Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ ArrayInterface = "7.17"
BandedMatrices = "1.8"
BlockDiagonals = "0.2"
CUDA = "5.5"
CUDSS = "0.4, 0.6.1"
CUDSS = "0.6.3"
CUSOLVERRF = "0.2.6"
ChainRulesCore = "1.25"
CliqueTrees = "1.11.0"
Expand Down
12 changes: 12 additions & 0 deletions ext/LinearSolveCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ function LinearSolve.is_cusparse(A::Union{
CUDA.CUSPARSE.CuSparseMatrixCSR, CUDA.CUSPARSE.CuSparseMatrixCSC})
true
end
LinearSolve.is_cusparse_csr(::CUDA.CUSPARSE.CuSparseMatrixCSR) = true
LinearSolve.is_cusparse_csc(::CUDA.CUSPARSE.CuSparseMatrixCSC) = true

function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
assump::OperatorAssumptions{Bool}) where {Tv, Ti}
Expand All @@ -31,6 +33,16 @@ function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
end
end

function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSC, b,
assump::OperatorAssumptions{Bool})
if LinearSolve.cudss_loaded(A)
@warn("CUDSS.jl does not support CuSparseMatrixCSC for LU Factorizations, consider using CuSparseMatrixCSR instead. Falling back to Krylov", maxlog=1)
else
@warn("CuSparseMatrixCSC does not support LU Factorization falling back to Krylov. Consider using CUDSS.jl together with CuSparseMatrixCSR", maxlog=1)
end
LinearSolve.DefaultLinearSolver(LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES)
end

function LinearSolve.error_no_cudss_lu(A::CUDA.CUSPARSE.CuSparseMatrixCSR)
if !LinearSolve.cudss_loaded(A)
error("CUDSS.jl is required for LU Factorizations on CuSparseMatrixCSR. Please load this library.")
Expand Down
2 changes: 1 addition & 1 deletion ext/LinearSolveCUSOLVERRFExt.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module LinearSolveCUSOLVERRFExt

using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions
using LinearSolve: LinearSolve, @get_cacheval, pattern_changed, OperatorAssumptions, LinearVerbosity
using CUSOLVERRF: CUSOLVERRF, RFLU, CUDA
using SparseArrays: SparseArrays, SparseMatrixCSC, nnz
using CUSOLVERRF.CUDA.CUSPARSE: CuSparseMatrixCSR
Expand Down
12 changes: 9 additions & 3 deletions ext/LinearSolveSparseArraysExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ function LinearSolve.init_cacheval(
maxiters::Int, abstol, reltol,
verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
if LinearSolve.is_cusparse(A)
ArrayInterface.lu_instance(A)
LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing
else
SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int64}(
zero(Int64), zero(Int64), [Int64(1)], Int64[], T[]))
Expand All @@ -141,7 +141,7 @@ function LinearSolve.init_cacheval(
maxiters::Int, abstol, reltol,
verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
if LinearSolve.is_cusparse(A)
ArrayInterface.lu_instance(A)
LinearSolve.cudss_loaded(A) ? ArrayInterface.lu_instance(A) : nothing
else
SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int32}(
zero(Int32), zero(Int32), [Int32(1)], Int32[], T[]))
Expand Down Expand Up @@ -344,7 +344,13 @@ function LinearSolve.init_cacheval(alg::NormalCholeskyFactorization,
Symmetric{T, <:AbstractSparseArray{T}}}, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool},
assumptions::OperatorAssumptions) where {T <: BLASELTYPES}
ArrayInterface.cholesky_instance(convert(AbstractMatrix, A))
if LinearSolve.is_cusparse_csc(A)
nothing
elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A)
nothing
else
ArrayInterface.cholesky_instance(convert(AbstractMatrix, A))
end
end

# Specialize QR for the non-square case
Expand Down
2 changes: 2 additions & 0 deletions src/LinearSolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,8 @@ ALREADY_WARNED_CUDSS = Ref{Bool}(false)
error_no_cudss_lu(A) = nothing
cudss_loaded(A) = false
is_cusparse(A) = false
is_cusparse_csr(A) = false
is_cusparse_csc(A) = false

export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization,
GenericLUFactorization, SimpleLUFactorization, RFLUFactorization, ButterflyFactorization,
Expand Down
8 changes: 7 additions & 1 deletion src/factorization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,13 @@ end
function init_cacheval(
alg::CholeskyFactorization, A::AbstractArray{<:BLASELTYPES}, b, u, Pl, Pr,
maxiters::Int, abstol, reltol, verbose::Union{LinearVerbosity, Bool}, assumptions::OperatorAssumptions)
ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot)
if LinearSolve.is_cusparse_csc(A)
nothing
elseif LinearSolve.is_cusparse_csr(A) && !LinearSolve.cudss_loaded(A)
nothing
else
ArrayInterface.cholesky_instance(convert(AbstractMatrix, A), alg.pivot)
end
end

const PREALLOCATED_CHOLESKY = ArrayInterface.cholesky_instance(rand(1, 1), NoPivot())
Expand Down
64 changes: 61 additions & 3 deletions test/gpu/cuda.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,65 @@
using LinearSolve, CUDA, LinearAlgebra, SparseArrays, StableRNGs
using CUDA.CUSPARSE, CUDSS
using CUDA.CUSPARSE
using Test

@testset "Test default solver choice for CuSparse" begin
b = Float64[1, 2, 3, 4]
b_gpu = CUDA.adapt(CuArray, b)

A = Float64[1 1 0 0
0 1 1 0
0 0 3 1
0 0 0 4]
A_gpu_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A))
A_gpu_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A))
prob_csr = LinearProblem(A_gpu_csr, b_gpu)
prob_csc = LinearProblem(A_gpu_csc, b_gpu)

A_sym = Float64[1 1 0 0
1 0 0 2
0 0 3 0
0 2 0 0]
A_gpu_sym_csr = CUDA.CUSPARSE.CuSparseMatrixCSR(sparse(A_sym))
A_gpu_sym_csc = CUDA.CUSPARSE.CuSparseMatrixCSC(sparse(A_sym))
prob_sym_csr = LinearProblem(A_gpu_sym_csr, b_gpu)
prob_sym_csc = LinearProblem(A_gpu_sym_csc, b_gpu)

@testset "Test without CUDSS loaded" begin
# assert CuDSS is not loaded yet
@test !LinearSolve.cudss_loaded(A_gpu_csr)
# csr fallback to krylov
alg = solve(prob_csr).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
# csc fallback to krylov
alg = solve(prob_csc).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
# csr symmetric fallback to krylov
alg = solve(prob_sym_csr).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
# csc symmetric fallback to krylov
alg = solve(prob_sym_csc).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
end

using CUDSS

@testset "Test with CUDSS loaded" begin
@test LinearSolve.cudss_loaded(A_gpu_csr)
# csr uses LU
alg = solve(prob_csr).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization
# csc fallback to krylov
alg = solve(prob_csc).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
# csr symmetric uses LU/cholesky
alg = solve(prob_sym_csr).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.LUFactorization
# csc symmetric fallback to krylov
alg = solve(prob_sym_csc).alg
@test alg.alg == LinearSolve.DefaultAlgorithmChoice.KrylovJL_GMRES
end
end

CUDA.allowscalar(false)

n = 8
Expand Down Expand Up @@ -96,9 +154,9 @@ end
@testset "CUDSS" begin
T = Float32
n = 100
A_cpu = sprand(T, n, n, 0.05) + I
A_cpu = sprand(rng, T, n, n, 0.05) + I
x_cpu = zeros(T, n)
b_cpu = rand(T, n)
b_cpu = rand(rng, T, n)

A_gpu_csr = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
Expand Down
Loading