Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add triangular operators for AMD and NVIDIA GPUs #41

Merged
merged 5 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 97 additions & 11 deletions ext/AMDGPU/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,19 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat),
@eval begin
function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
m,n = size(A)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
if nrhs == 1
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
descX = rocSPARSE.ROCDenseVectorDescriptor(T, n)
descY = rocSPARSE.ROCDenseVectorDescriptor(T, m)
algo = rocSPARSE.rocSPARSE.rocsparse_spmv_alg_default
buffer_size = Ref{Csize_t}()
rocSPARSE.rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size, C_NULL)
rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), transa, alpha, descA, descX,
beta, descY, T, algo, buffer_size, C_NULL)
buffer = ROCVector{UInt8}(undef, buffer_size[])
return AMD_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
else
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs)
descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs)
algo = rocSPARSE.rocsparse_spmm_alg_default
Expand Down Expand Up @@ -62,8 +60,10 @@ function LinearAlgebra.mul!(y::ROCVector{T}, A::AMD_KrylovOperator{T}, x::ROCVec
descY = rocSPARSE.ROCDenseVectorDescriptor(y)
descX = rocSPARSE.ROCDenseVectorDescriptor(x)
algo = rocSPARSE.rocsparse_spmv_alg_default
rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX,
Ref{T}(zero(T)), descY, T, algo, A.buffer_size, A.buffer)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), A.transa, alpha, A.descA, descX,
beta, descY, T, algo, A.buffer_size, A.buffer)
end

function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat
Expand All @@ -75,6 +75,92 @@ function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMat
descY = rocSPARSE.ROCDenseMatrixDescriptor(Y)
descX = rocSPARSE.ROCDenseMatrixDescriptor(X)
algo = rocSPARSE.rocsparse_spmm_alg_default
rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX,
Ref{T}(zero(T)), descY, T, algo, rocSPARSE.rocsparse_spmm_stage_compute, A.buffer_size, A.buffer)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX,
beta, descY, T, algo, rocSPARSE.rocsparse_spmm_stage_compute, A.buffer_size, A.buffer)
end

mutable struct AMD_TriangularOperator{T} <: AbstractTriangularOperator{T}
type::Type{T}
m::Int
n::Int
nrhs::Int
transa::Char
descA::rocSPARSE.ROCSparseMatrixDescriptor
buffer_size::Ref{Csize_t}
buffer::ROCVector{UInt8}
end

eltype(A::AMD_TriangularOperator{T}) where T = T
size(A::AMD_TriangularOperator) = (A.m, A.n)

for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat),
(:(ROCSparseMatrixCOO{T}), :BlasFloat))
@eval begin
function KP.TriangularOperator(A::$SparseMatrixType, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N') where T <: $BlasType
m,n = size(A)
alpha = Ref{T}(one(T))
descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
rocsparse_uplo = Ref{rocSPARSE.rocsparse_fill_mode}(uplo)
rocsparse_diag = Ref{rocSPARSE.rocsparse_diag_type}(diag)
rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_fill_mode, rocsparse_uplo, Csize_t(sizeof(rocsparse_uplo)))
rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_diag_type, rocsparse_diag, Csize_t(sizeof(rocsparse_diag)))
if nrhs == 1
descX = rocSPARSE.ROCDenseVectorDescriptor(T, n)
descY = rocSPARSE.ROCDenseVectorDescriptor(T, m)
algo = rocSPARSE.rocsparse_spsv_alg_default
buffer_size = Ref{Csize_t}()
rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo,
rocSPARSE.rocsparse_spsv_stage_buffer_size, buffer_size, C_NULL)
buffer = ROCVector{UInt8}(undef, buffer_size[])
rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo,
rocSPARSE.rocsparse_spsv_stage_preprocess, buffer_size, buffer)
return AMD_TriangularOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
else
descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs)
descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs)
algo = rocSPARSE.rocsparse_spsm_alg_default
buffer_size = Ref{Csize_t}()
rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo,
rocSPARSE.rocsparse_spsm_stage_buffer_size, buffer_size, C_NULL)
buffer = ROCVector{UInt8}(undef, buffer_size[])
rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo,
rocSPARSE.rocsparse_spsm_stage_preprocess, buffer_size, buffer)
return AMD_TriangularOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
end
end

function KP.update!(A::AMD_TriangularOperator{T}, B::$SparseMatrixType) where T <: $BlasFloat
(B isa ROCSparseMatrixCOO) && rocSPARSE.rocsparse_coo_set_pointers(A.descA, B.rowInd, B.colInd, B.nzVal)
(B isa ROCSparseMatrixCSR) && rocSPARSE.rocsparse_csr_set_pointers(A.descA, B.rowPtr, B.colVal, B.nzVal)
return A
end
end
end

function LinearAlgebra.ldiv!(y::ROCVector{T}, A::AMD_TriangularOperator{T}, x::ROCVector{T}) where T <: BlasFloat
(length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
(length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
(A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
descY = rocSPARSE.ROCDenseVectorDescriptor(y)
descX = rocSPARSE.ROCDenseVectorDescriptor(x)
algo = rocSPARSE.rocsparse_spsv_alg_default
alpha = Ref{T}(one(T))
rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), A.transa, alpha, A.descA, descX, descY, T,
algo, rocSPARSE.rocsparse_spsv_stage_compute, A.buffer_size, A.buffer)
end

function LinearAlgebra.ldiv!(Y::ROCMatrix{T}, A::AMD_TriangularOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat
mY, nY = size(Y)
mX, nX = size(X)
(mY != A.m) && throw(DimensionMismatch("mY != A.m"))
(mX != A.n) && throw(DimensionMismatch("mX != A.n"))
(nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
descY = rocSPARSE.ROCDenseMatrixDescriptor(Y)
descX = rocSPARSE.ROCDenseMatrixDescriptor(X)
algo = rocSPARSE.rocsparse_spsm_alg_default
alpha = Ref{T}(one(T))
rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, descY, T,
algo, rocSPARSE.rocsparse_spsm_stage_compute, A.buffer_size, A.buffer)
end
108 changes: 97 additions & 11 deletions ext/CUDA/operators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
@eval begin
function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
m,n = size(A)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
if nrhs == 1
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
descX = CUSPARSE.CuDenseVectorDescriptor(T, n)
descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
Expand All @@ -29,18 +29,14 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
buffer = CuVector{UInt8}(undef, buffer_size[])
return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
else
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)
algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
buffer_size = Ref{Csize_t}()
transb = 'N'
CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T, algo, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
if !(A isa CuSparseMatrixCOO)
CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T, algo, buffer)
end
return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
end
Expand All @@ -61,7 +57,9 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::NVIDIA_KrylovOperator{T}, x::CuVe
descY = CUSPARSE.CuDenseVectorDescriptor(y)
descX = CUSPARSE.CuDenseVectorDescriptor(x)
algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, alpha, A.descA, descX, beta, descY, T, algo, A.buffer)
end

function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
Expand All @@ -73,5 +71,93 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMa
descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
descX = CUSPARSE.CuDenseMatrixDescriptor(X)
algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer)
alpha = Ref{T}(one(T))
beta = Ref{T}(zero(T))
CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, beta, descY, T, algo, A.buffer)
end

mutable struct NVIDIA_TriangularOperator{T,S} <: AbstractTriangularOperator{T}
type::Type{T}
m::Int
n::Int
nrhs::Int
transa::Char
descA::CUSPARSE.CuSparseMatrixDescriptor
descT::S
buffer::CuVector{UInt8}
end

eltype(A::NVIDIA_TriangularOperator{T}) where T = T
size(A::NVIDIA_TriangularOperator) = (A.m, A.n)

for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
(:(CuSparseMatrixCOO{T}), :BlasFloat))
@eval begin
function KP.TriangularOperator(A::$SparseMatrixType, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N') where T <: $BlasType
m,n = size(A)
alpha = Ref{T}(one(T))
descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
cusparse_uplo = Ref{CUSPARSE.cusparseFillMode_t}(uplo)
cusparse_diag = Ref{CUSPARSE.cusparseDiagType_t}(diag)
CUSPARSE.cusparseSpMatSetAttribute(descA, 'F', cusparse_uplo, Csize_t(sizeof(cusparse_uplo)))
CUSPARSE.cusparseSpMatSetAttribute(descA, 'D', cusparse_diag, Csize_t(sizeof(cusparse_diag)))
if nrhs == 1
descT = CUSPARSE.CuSparseSpSVDescriptor()
descX = CUSPARSE.CuDenseVectorDescriptor(T, n)
descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
algo = CUSPARSE.CUSPARSE_SPSV_ALG_DEFAULT
buffer_size = Ref{Csize_t}()
CUSPARSE.cusparseSpSV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, descT, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.cusparseSpSV_analysis(CUSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, descT, buffer)
return NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSVDescriptor}(T, m, n, nrhs, transa, descA, descT, buffer)
else
descT = CUSPARSE.CuSparseSpSMDescriptor()
descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)
algo = CUSPARSE.CUSPARSE_SPSM_ALG_DEFAULT
buffer_size = Ref{Csize_t}()
CUSPARSE.cusparseSpSM_bufferSize(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, descT, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.cusparseSpSM_analysis(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, descT, buffer)
return NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSMDescriptor}(T, m, n, nrhs, transa, descA, descT, buffer)
end
end

function KP.update!(A::NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSVDescriptor}, B::$SparseMatrixType) where T <: $BlasFloat
CUSPARSE.version() ≥ v"12.2" || error("This operation is only support by CUDA ≥ v12.3")
descB = CUSPARSE.CuSparseMatrixDescriptor(B, 'O')
A.descA = descB
CUSPARSE.cusparseSpSV_updateMatrix(CUSPARSE.handle(), A.descT, B.nzVal, 'G')
return A
end

function KP.update!(A::NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSMDescriptor}, B::$SparseMatrixType) where T <: $BlasFloat
return error("This operation will be supported in CUDA v12.4")
end
end
end

function LinearAlgebra.ldiv!(y::CuVector{T}, A::NVIDIA_TriangularOperator{T}, x::CuVector{T}) where T <: BlasFloat
(length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
(length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
(A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
descY = CUSPARSE.CuDenseVectorDescriptor(y)
descX = CUSPARSE.CuDenseVectorDescriptor(x)
algo = CUSPARSE.CUSPARSE_SPSV_ALG_DEFAULT
alpha = Ref{T}(one(T))
CUSPARSE.cusparseSpSV_solve(CUSPARSE.handle(), A.transa, alpha, A.descA, descX, descY, T, algo, A.descT)
end

function LinearAlgebra.ldiv!(Y::CuMatrix{T}, A::NVIDIA_TriangularOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
mY, nY = size(Y)
mX, nX = size(X)
(mY != A.m) && throw(DimensionMismatch("mY != A.m"))
(mX != A.n) && throw(DimensionMismatch("mX != A.n"))
(nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
descX = CUSPARSE.CuDenseMatrixDescriptor(X)
algo = CUSPARSE.CUSPARSE_SPSM_ALG_DEFAULT
alpha = Ref{T}(one(T))
CUSPARSE.cusparseSpSM_solve(CUSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, descY, T, algo, A.descT)
end
6 changes: 6 additions & 0 deletions src/KrylovPreconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ export AbstractKrylovPreconditioner
abstract type AbstractKrylovOperator{T} end
export AbstractKrylovOperator

abstract type AbstractTriangularOperator{T} end
export AbstractTriangularOperator

update!(p::AbstractKrylovPreconditioner, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.")
update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) is not implemented.")
update!(p::AbstractKrylovOperator, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.")
Expand All @@ -33,6 +36,9 @@ end
function KrylovOperator end
export KrylovOperator

function TriangularOperator end
export TriangularOperator

# Preconditioners
include("ic0.jl")
include("ilu0.jl")
Expand Down
9 changes: 9 additions & 0 deletions test/gpu/amd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ include("gpu.jl")
end
end

@testset "TriangularOperator" begin
@testset "ROCSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
test_triangular(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCOO{FC})
end
@testset "ROCSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
test_triangular(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCSR{FC})
end
end

@testset "Block Jacobi preconditioner" begin
test_block_jacobi(ROCBackend(), ROCArray, ROCSparseMatrixCSR)
end
Expand Down
65 changes: 65 additions & 0 deletions test/gpu/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,71 @@ function test_operator(FC, V, DM, SM)
end
end

function test_triangular(FC, V, DM, SM)
n = 100
for (uplo, diag, triangle) in [('L', 'U', UnitLowerTriangular),
('L', 'N', LowerTriangular ),
('U', 'U', UnitUpperTriangular),
('U', 'N', UpperTriangular )]
A_cpu = rand(FC, n, n)
A_cpu = uplo == 'L' ? tril(A_cpu) : triu(A_cpu)
A_cpu = diag == 'U' ? A_cpu - Diagonal(A_cpu) + I : A_cpu
A_cpu = sparse(A_cpu)
b_cpu = rand(FC, n)

A_gpu = SM(A_cpu)
b_gpu = V(b_cpu)
opA_gpu = TriangularOperator(A_gpu, uplo, diag)
for i = 1:5
y_cpu = rand(FC, n)
x_cpu = rand(FC, n)
ldiv!(y_cpu, triangle(A_cpu), x_cpu)
y_gpu = V(y_cpu)
x_gpu = V(x_cpu)
ldiv!(y_gpu, opA_gpu, x_gpu)
@test collect(y_gpu) ≈ y_cpu
end
for j = 1:5
y_cpu = rand(FC, n)
x_cpu = rand(FC, n)
A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
ldiv!(y_cpu, triangle(A_cpu2), x_cpu)
y_gpu = V(y_cpu)
x_gpu = V(x_cpu)
A_gpu2 = SM(A_cpu2)
update!(opA_gpu, A_gpu2)
ldiv!(y_gpu, opA_gpu, x_gpu)
@test collect(y_gpu) ≈ y_cpu
end

nrhs = 3
opA_gpu = TriangularOperator(A_gpu, uplo, diag; nrhs)
for i = 1:5
Y_cpu = rand(FC, n, nrhs)
X_cpu = rand(FC, n, nrhs)
ldiv!(Y_cpu, triangle(A_cpu), X_cpu)
Y_gpu = DM(Y_cpu)
X_gpu = DM(X_cpu)
ldiv!(Y_gpu, opA_gpu, X_gpu)
@test collect(Y_gpu) ≈ Y_cpu
end
if V.body.name.name != :CuArray
for j = 1:5
Y_cpu = rand(FC, n, nrhs)
X_cpu = rand(FC, n, nrhs)
A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
ldiv!(Y_cpu, triangle(A_cpu2), X_cpu)
Y_gpu = DM(Y_cpu)
X_gpu = DM(X_cpu)
A_gpu2 = SM(A_cpu2)
update!(opA_gpu, A_gpu2)
ldiv!(Y_gpu, opA_gpu, X_gpu)
@test collect(Y_gpu) ≈ Y_cpu
end
end
end
end

_get_type(J::SparseMatrixCSC) = Vector{Float64}

function generate_random_system(n::Int, m::Int)
Expand Down
9 changes: 9 additions & 0 deletions test/gpu/nvidia.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ include("gpu.jl")
end
end

@testset "TriangularOperator" begin
@testset "CuSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
test_triangular(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCOO{FC})
end
@testset "CuSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
test_triangular(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCSR{FC})
end
end

@testset "Block Jacobi preconditioner" begin
test_block_jacobi(CUDABackend(), CuArray, CuSparseMatrixCSR)
end
Expand Down