From bd5f46aa242bb5dd48aad87dcbc7cda8b3ff8a74 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Fri, 28 Nov 2025 11:56:12 +0100
Subject: [PATCH 1/6] only build Cholesky and LU cache if possible

---
 ext/LinearSolveCUDAExt.jl         |  2 ++
 ext/LinearSolveSparseArraysExt.jl | 12 +++++++++---
 src/LinearSolve.jl                |  2 ++
 src/factorization.jl              |  8 +++++++-
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index 58e2c3444..7619c4e99 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -17,6 +17,8 @@ function LinearSolve.is_cusparse(A::Union{
         CUDA.CUSPARSE.CuSparseMatrixCSR, CUDA.CUSPARSE.CuSparseMatrixCSC})
     true
 end
+LinearSolve.is_cusparse_csr(::CUDA.CUSPARSE.CuSparseMatrixCSR) = true
+LinearSolve.is_cusparse_csc(::CUDA.CUSPARSE.CuSparseMatrixCSC) = true
 
 function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
         assump::OperatorAssumptions{Bool}) where {Tv, Ti}
diff --git a/ext/LinearSolveSparseArraysExt.jl b/ext/LinearSolveSparseArraysExt.jl
index f956539e3..17249618c 100644
--- a/ext/LinearSolveSparseArraysExt.jl
+++ b/ext/LinearSolveSparseArraysExt.jl
@@ -129,7 +129,7 @@ function LinearSolve.init_cacheval(
         maxiters::Int, abstol, reltol,
         verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
     if LinearSolve.is_cusparse(A)
-        ArrayInterface.lu_instance(A)
+        LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing
     else
         SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int64}(
             zero(Int64), zero(Int64), [Int64(1)], Int64[], T[]))
@@ -141,7 +141,7 @@ function LinearSolve.init_cacheval(
         maxiters::Int, abstol, reltol,
         verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
     if LinearSolve.is_cusparse(A)
-        ArrayInterface.lu_instance(A)
+        LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing
     else
         SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int32}(
             zero(Int32), zero(Int32), [Int32(1)], Int32[], T[]))
@@ -344,7 +344,13 @@ function LinearSolve.init_cacheval(alg::NormalCholeskyFactorization,
             Symmetric{T, <:AbstractSparseArray{T}}}, b, u, Pl, Pr,
         maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool},
         assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
-    ArrayInterface.cholesky_instance(convert(AbstractMatrix, A))
+    if LinearSolve.is_cusparse_csc(A)
+        nothing
+    elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A)
+        nothing
+    else
+        ArrayInterface.cholesky_instance(convert(AbstractMatrix, A))
+    end
 end
 
 # Specialize QR for the non-square case
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
index 37ed234f4..f21bc7331 100644
--- a/src/LinearSolve.jl
+++ b/src/LinearSolve.jl
@@ -478,6 +478,8 @@ ALREADY_WARNED_CUDSS = Ref{Bool}(false)
 error_no_cudss_lu(A) = nothing
 cudss_loaded(A) = false
 is_cusparse(A) = false
+is_cusparse_csr(A) = false
+is_cusparse_csc(A) = false
 
 export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization,
        GenericLUFactorization, SimpleLUFactorization, RFLUFactorization, ButterflyFactorization,
diff --git a/src/factorization.jl b/src/factorization.jl
index 144153123..ed7aaa723 100644
--- a/src/factorization.jl
+++ b/src/factorization.jl
@@ -395,7 +395,13 @@ end
 function init_cacheval(
         alg::CholeskyFactorization, A::AbstractArray{<:BLASELTYPES}, b, u, Pl, Pr,
         maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
-    ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot)
+    if LinearSolve.is_cusparse_csc(A)
+        nothing
+    elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A)
+        nothing
+    else
+        ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot)
+    end
 end
 
 const PREALLOCATED_CHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot())

From b1a2c495945782815d88a9021457410d49588bdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Fri, 28 Nov 2025 11:57:26 +0100
Subject: [PATCH 2/6] always fall back to Krylov for CuSparseCSC

---
 ext/LinearSolveCUDAExt.jl | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index 7619c4e99..9585efa15 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -33,6 +33,16 @@ function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
     end
 end
 
+function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC, b,
+        assump::OperatorAssumptions{Bool})
+    if LinearSolve.cudss_loaded(A)
+        @warn("CUDSS.jl does not support CuSparseMatrixCSC for LU Factorizations, consider using CuSparseMatrixCSR instead. Falling back to Krylov", maxlog=1)
+    else
+        @warn("CuSparseMatrixCSC does not support LU Factorization falling back to Krylov. Consider using CUDSS.jl together with CuSparseMatrixCSR", maxlog=1)
+    end
+    LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES)
+end
+
 function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)
     if !LinearSolve.cudss_loaded(A)
         error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.")

From 7769ca380d8b28b42596b993cf330d5588b5d5a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Thu, 27 Nov 2025 15:41:01 +0100
Subject: [PATCH 3/6] fix CUSOLVERF test

---
 ext/LinearSolveCUSOLVERRFExt.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/LinearSolveCUSOLVERRFExt.jl b/ext/LinearSolveCUSOLVERRFExt.jl
index 55a7f5f35..2e4e2364f 100644
--- a/ext/LinearSolveCUSOLVERRFExt.jl
+++ b/ext/LinearSolveCUSOLVERRFExt.jl
@@ -1,6 +1,6 @@
 module LinearSolveCUSOLVERRFExt
 
-using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions
+using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions, LinearVerbosity
 using CUSOLVERRF: CUSOLVERRF, RFLU, CUDA
 using SparseArrays: SparseArrays, SparseMatrixCSC, nnz
 using CUSOLVERRF.CUDA.CUSPARSE: CuSparseMatrixCSR

From ded449e1114b6fa002971de61b37862ad5dbe843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Thu, 27 Nov 2025 15:41:24 +0100
Subject: [PATCH 4/6] add test for CuSparseMatrixCSR + Cholesky

---
 test/gpu/cuda.jl | 65 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl
index 0d88bb741..1bb0fc71f 100644
--- a/test/gpu/cuda.jl
+++ b/test/gpu/cuda.jl
@@ -1,7 +1,66 @@
 using LinearSolve, CUDA, LinearAlgebra, SparseArrays, StableRNGs
-using CUDA.CUSPARSE, CUDSS
+using CUDA.CUSPARSE
 using Test
 
+@testset "Test default solver choice for CuSparse" begin
+    b = Float64[1, 2, 3, 4]
+    b_gpu = CUDA.adapt(CuArray, b)
+
+    A = Float64[1 1 0 0
+                0 1 1 0
+                0 0 3 1
+                0 0 0 4]
+    A_gpu_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A))
+    A_gpu_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A))
+    prob_csr = LinearProblem(A_gpu_csr, b_gpu)
+    prob_csc = LinearProblem(A_gpu_csc, b_gpu)
+
+    A_sym = Float64[1 1 0 0
+                    1 0 0 2
+                    0 0 3 0
+                    0 2 0 0]
+    A_gpu_sym_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A_sym))
+    A_gpu_sym_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A_sym))
+    prob_sym_csr = LinearProblem(A_gpu_sym_csr, b_gpu)
+    prob_sym_csc = LinearProblem(A_gpu_sym_csc, b_gpu)
+
+    @testset "Test without CUDSS loaded" begin
+        # assert CuDSS is not loaded yet
+        @test !LinearSolve.cudss_loaded(A_gpu_csr)
+        # csr fallback to krylov
+        alg = solve(prob_csr).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+        # csc fallback to krylov
+        alg = solve(prob_csc).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+        # csr symmetric fallback to krylov
+        alg = solve(prob_sym_csr).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+        # csc symmetric fallback to krylov
+        alg = solve(prob_sym_csc).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+    end
+
+    # after loading CUDSS, it should fall back tu LU factorization
+    using CUDSS
+
+    @testset "Test with CUDSS loaded" begin
+        @test LinearSolve.cudss_loaded(A_gpu_csr)
+        # csr uses LU
+        alg = solve(prob_csr).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization
+        # csc fallback to krylov
+        alg = solve(prob_csc).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+        # csr symmetric uses LU/cholesky
+        alg = solve(prob_sym_csr).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization
+        # csc symetric fallback to krylov
+        alg = solve(prob_sym_csc).alg
+        @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
+    end
+end
+
 CUDA.allowscalar(false)
 
 n = 8
@@ -96,9 +155,9 @@ end
 @testset "CUDSS" begin
     T = Float32
     n = 100
-    A_cpu = sprand(T, n, n, 0.05) + I
+    A_cpu = sprand(rng, T, n, n, 0.05) + I
     x_cpu = zeros(T, n)
-    b_cpu = rand(T, n)
+    b_cpu = rand(rng, T, n)
 
     A_gpu_csr = CuSparseMatrixCSR(A_cpu)
     b_gpu = CuVector(b_cpu)

From a9b47c8b3b521359f699f5c5a656b3820c60853a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Fri, 28 Nov 2025 12:11:00 +0100
Subject: [PATCH 5/6] raise CUDSS ver since cholesky requires recent fix

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 5f386a640..74f320730 100644
--- a/Project.toml
+++ b/Project.toml
@@ -86,7 +86,7 @@ ArrayInterface = "7.17"
 BandedMatrices = "1.8"
 BlockDiagonals = "0.2"
 CUDA = "5.5"
-CUDSS = "0.4, 0.6.1"
+CUDSS = "0.6.3"
 CUSOLVERRF = "0.2.6"
 ChainRulesCore = "1.25"
 CliqueTrees = "1.11.0"

From 3d0f5570ca5c9a267c5e238b6c7693deea961d56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hans=20W=C3=BCrfel?= <git@wuerfel.io>
Date: Fri, 28 Nov 2025 12:28:26 +0100
Subject: [PATCH 6/6] fix spelling

---
 test/gpu/cuda.jl | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl
index 1bb0fc71f..8a3edf8d1 100644
--- a/test/gpu/cuda.jl
+++ b/test/gpu/cuda.jl
@@ -41,7 +41,6 @@ using Test
         @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
     end
 
-    # after loading CUDSS, it should fall back tu LU factorization
     using CUDSS
 
     @testset "Test with CUDSS loaded" begin
@@ -55,7 +54,7 @@ using Test
         # csr symmetric uses LU/cholesky
         alg = solve(prob_sym_csr).alg
         @test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization
-        # csc symetric fallback to krylov
+        # csc symmetric fallback to krylov
         alg = solve(prob_sym_csc).alg
         @test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
     end