diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl index 7cb767d..c5a7f77 100644 --- a/ext/AMDGPU/operators.jl +++ b/ext/AMDGPU/operators.jl @@ -18,21 +18,19 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat), @eval begin function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType m,n = size(A) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O') if nrhs == 1 - alpha = Ref{T}(one(T)) - beta = Ref{T}(zero(T)) - descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O') descX = rocSPARSE.ROCDenseVectorDescriptor(T, n) descY = rocSPARSE.ROCDenseVectorDescriptor(T, m) algo = rocSPARSE.rocSPARSE.rocsparse_spmv_alg_default buffer_size = Ref{Csize_t}() - rocSPARSE.rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size, C_NULL) + rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), transa, alpha, descA, descX, + beta, descY, T, algo, buffer_size, C_NULL) buffer = ROCVector{UInt8}(undef, buffer_size[]) return AMD_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer) else - alpha = Ref{T}(one(T)) - beta = Ref{T}(zero(T)) - descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O') descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs) descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs) algo = rocSPARSE.rocsparse_spmm_alg_default @@ -62,8 +60,10 @@ function LinearAlgebra.mul!(y::ROCVector{T}, A::AMD_KrylovOperator{T}, x::ROCVec descY = rocSPARSE.ROCDenseVectorDescriptor(y) descX = rocSPARSE.ROCDenseVectorDescriptor(x) algo = rocSPARSE.rocsparse_spmv_alg_default - rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX, - Ref{T}(zero(T)), descY, T, algo, A.buffer_size, A.buffer) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), A.transa, alpha, A.descA, descX, + beta, descY, T, algo, A.buffer_size, A.buffer) end function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat @@ -75,6 +75,92 @@ function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMat descY = rocSPARSE.ROCDenseMatrixDescriptor(Y) descX = rocSPARSE.ROCDenseMatrixDescriptor(X) algo = rocSPARSE.rocsparse_spmm_alg_default - rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX, - Ref{T}(zero(T)), descY, T, algo, rocSPARSE.rocsparse_spmm_stage_compute, A.buffer_size, A.buffer) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, + beta, descY, T, algo, rocSPARSE.rocsparse_spmm_stage_compute, A.buffer_size, A.buffer) +end + +mutable struct AMD_TriangularOperator{T} <: AbstractTriangularOperator{T} + type::Type{T} + m::Int + n::Int + nrhs::Int + transa::Char + descA::rocSPARSE.ROCSparseMatrixDescriptor + buffer_size::Ref{Csize_t} + buffer::ROCVector{UInt8} +end + +eltype(A::AMD_TriangularOperator{T}) where T = T +size(A::AMD_TriangularOperator) = (A.m, A.n) + +for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat), + (:(ROCSparseMatrixCOO{T}), :BlasFloat)) + @eval begin + function KP.TriangularOperator(A::$SparseMatrixType, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N') where T <: $BlasType + m,n = size(A) + alpha = Ref{T}(one(T)) + descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O') + rocsparse_uplo = Ref{rocSPARSE.rocsparse_fill_mode}(uplo) + rocsparse_diag = Ref{rocSPARSE.rocsparse_diag_type}(diag) + rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_fill_mode, rocsparse_uplo, Csize_t(sizeof(rocsparse_uplo))) + rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_diag_type, rocsparse_diag, Csize_t(sizeof(rocsparse_diag))) + if nrhs == 1 + descX = rocSPARSE.ROCDenseVectorDescriptor(T, n) + descY = rocSPARSE.ROCDenseVectorDescriptor(T, m) + algo = rocSPARSE.rocsparse_spsv_alg_default + buffer_size = Ref{Csize_t}() + rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsv_stage_buffer_size, buffer_size, C_NULL) + buffer = ROCVector{UInt8}(undef, buffer_size[]) + rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsv_stage_preprocess, buffer_size, buffer) + return AMD_TriangularOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer) + else + descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs) + descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs) + algo = rocSPARSE.rocsparse_spsm_alg_default + buffer_size = Ref{Csize_t}() + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsm_stage_buffer_size, buffer_size, C_NULL) + buffer = ROCVector{UInt8}(undef, buffer_size[]) + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsm_stage_preprocess, buffer_size, buffer) + return AMD_TriangularOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer) + end + end + + function KP.update!(A::AMD_TriangularOperator{T}, B::$SparseMatrixType) where T <: $BlasFloat + (B isa ROCSparseMatrixCOO) && rocSPARSE.rocsparse_coo_set_pointers(A.descA, B.rowInd, B.colInd, B.nzVal) + (B isa ROCSparseMatrixCSR) && rocSPARSE.rocsparse_csr_set_pointers(A.descA, B.rowPtr, B.colVal, B.nzVal) + return A + end + end +end + +function LinearAlgebra.ldiv!(y::ROCVector{T}, A::AMD_TriangularOperator{T}, x::ROCVector{T}) where T <: BlasFloat + (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m")) + (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n")) + (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1")) + descY = rocSPARSE.ROCDenseVectorDescriptor(y) + descX = rocSPARSE.ROCDenseVectorDescriptor(x) + algo = rocSPARSE.rocsparse_spsv_alg_default + alpha = Ref{T}(one(T)) + rocSPARSE.rocsparse_spsv(rocSPARSE.handle(), A.transa, alpha, A.descA, descX, descY, T, + algo, rocSPARSE.rocsparse_spsv_stage_compute, A.buffer_size, A.buffer) +end + +function LinearAlgebra.ldiv!(Y::ROCMatrix{T}, A::AMD_TriangularOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat + mY, nY = size(Y) + mX, nX = size(X) + (mY != A.m) && throw(DimensionMismatch("mY != A.m")) + (mX != A.n) && throw(DimensionMismatch("mX != A.n")) + (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs")) + descY = rocSPARSE.ROCDenseMatrixDescriptor(Y) + descX = rocSPARSE.ROCDenseMatrixDescriptor(X) + algo = rocSPARSE.rocsparse_spsm_alg_default + alpha = Ref{T}(one(T)) + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, descY, T, + algo, rocSPARSE.rocsparse_spsm_stage_compute, A.buffer_size, A.buffer) end diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl index ca26d3c..4cc37b4 100644 --- a/ext/CUDA/operators.jl +++ b/ext/CUDA/operators.jl @@ -17,10 +17,10 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat), @eval begin function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType m,n = size(A) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O') if nrhs == 1 - alpha = Ref{T}(one(T)) - beta = Ref{T}(zero(T)) - descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O') descX = CUSPARSE.CuDenseVectorDescriptor(T, n) descY = CUSPARSE.CuDenseVectorDescriptor(T, m) algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT @@ -29,18 +29,14 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat), buffer = CuVector{UInt8}(undef, buffer_size[]) return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer) else - alpha = Ref{T}(one(T)) - beta = Ref{T}(zero(T)) - descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O') descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs) descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs) algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT buffer_size = Ref{Csize_t}() - transb = 'N' - CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size) + CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T, algo, buffer_size) buffer = CuVector{UInt8}(undef, buffer_size[]) if !(A isa CuSparseMatrixCOO) - CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer) + CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T, algo, buffer) end return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer) end @@ -61,7 +57,9 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::NVIDIA_KrylovOperator{T}, x::CuVe descY = CUSPARSE.CuDenseVectorDescriptor(y) descX = CUSPARSE.CuDenseVectorDescriptor(x) algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT - CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, alpha, A.descA, descX, beta, descY, T, algo, A.buffer) end function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat @@ -73,5 +71,93 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMa descY = CUSPARSE.CuDenseMatrixDescriptor(Y) descX = CUSPARSE.CuDenseMatrixDescriptor(X) algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT - CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer) + alpha = Ref{T}(one(T)) + beta = Ref{T}(zero(T)) + CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, beta, descY, T, algo, A.buffer) +end + +mutable struct NVIDIA_TriangularOperator{T,S} <: AbstractTriangularOperator{T} + type::Type{T} + m::Int + n::Int + nrhs::Int + transa::Char + descA::CUSPARSE.CuSparseMatrixDescriptor + descT::S + buffer::CuVector{UInt8} +end + +eltype(A::NVIDIA_TriangularOperator{T}) where T = T +size(A::NVIDIA_TriangularOperator) = (A.m, A.n) + +for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat), + (:(CuSparseMatrixCOO{T}), :BlasFloat)) + @eval begin + function KP.TriangularOperator(A::$SparseMatrixType, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N') where T <: $BlasType + m,n = size(A) + alpha = Ref{T}(one(T)) + descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O') + cusparse_uplo = Ref{CUSPARSE.cusparseFillMode_t}(uplo) + cusparse_diag = Ref{CUSPARSE.cusparseDiagType_t}(diag) + CUSPARSE.cusparseSpMatSetAttribute(descA, 'F', cusparse_uplo, Csize_t(sizeof(cusparse_uplo))) + CUSPARSE.cusparseSpMatSetAttribute(descA, 'D', cusparse_diag, Csize_t(sizeof(cusparse_diag))) + if nrhs == 1 + descT = CUSPARSE.CuSparseSpSVDescriptor() + descX = CUSPARSE.CuDenseVectorDescriptor(T, n) + descY = CUSPARSE.CuDenseVectorDescriptor(T, m) + algo = CUSPARSE.CUSPARSE_SPSV_ALG_DEFAULT + buffer_size = Ref{Csize_t}() + CUSPARSE.cusparseSpSV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, descT, buffer_size) + buffer = CuVector{UInt8}(undef, buffer_size[]) + CUSPARSE.cusparseSpSV_analysis(CUSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, descT, buffer) + return NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSVDescriptor}(T, m, n, nrhs, transa, descA, descT, buffer) + else + descT = CUSPARSE.CuSparseSpSMDescriptor() + descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs) + descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs) + algo = CUSPARSE.CUSPARSE_SPSM_ALG_DEFAULT + buffer_size = Ref{Csize_t}() + CUSPARSE.cusparseSpSM_bufferSize(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, descT, buffer_size) + buffer = CuVector{UInt8}(undef, buffer_size[]) + CUSPARSE.cusparseSpSM_analysis(CUSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, descT, buffer) + return NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSMDescriptor}(T, m, n, nrhs, transa, descA, descT, buffer) + end + end + + function KP.update!(A::NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSVDescriptor}, B::$SparseMatrixType) where T <: $BlasFloat + CUSPARSE.version() ≥ v"12.2" || error("This operation is only support by CUDA ≥ v12.3") + descB = CUSPARSE.CuSparseMatrixDescriptor(B, 'O') + A.descA = descB + CUSPARSE.cusparseSpSV_updateMatrix(CUSPARSE.handle(), A.descT, B.nzVal, 'G') + return A + end + + function KP.update!(A::NVIDIA_TriangularOperator{T,CUSPARSE.CuSparseSpSMDescriptor}, B::$SparseMatrixType) where T <: $BlasFloat + return error("This operation will be supported in CUDA v12.4") + end + end +end + +function LinearAlgebra.ldiv!(y::CuVector{T}, A::NVIDIA_TriangularOperator{T}, x::CuVector{T}) where T <: BlasFloat + (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m")) + (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n")) + (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1")) + descY = CUSPARSE.CuDenseVectorDescriptor(y) + descX = CUSPARSE.CuDenseVectorDescriptor(x) + algo = CUSPARSE.CUSPARSE_SPSV_ALG_DEFAULT + alpha = Ref{T}(one(T)) + CUSPARSE.cusparseSpSV_solve(CUSPARSE.handle(), A.transa, alpha, A.descA, descX, descY, T, algo, A.descT) +end + +function LinearAlgebra.ldiv!(Y::CuMatrix{T}, A::NVIDIA_TriangularOperator{T}, X::CuMatrix{T}) where T <: BlasFloat + mY, nY = size(Y) + mX, nX = size(X) + (mY != A.m) && throw(DimensionMismatch("mY != A.m")) + (mX != A.n) && throw(DimensionMismatch("mX != A.n")) + (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs")) + descY = CUSPARSE.CuDenseMatrixDescriptor(Y) + descX = CUSPARSE.CuDenseMatrixDescriptor(X) + algo = CUSPARSE.CUSPARSE_SPSM_ALG_DEFAULT + alpha = Ref{T}(one(T)) + CUSPARSE.cusparseSpSM_solve(CUSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, descY, T, algo, A.descT) end diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl index 4d86778..7497c80 100644 --- a/src/KrylovPreconditioners.jl +++ b/src/KrylovPreconditioners.jl @@ -15,6 +15,9 @@ export AbstractKrylovPreconditioner abstract type AbstractKrylovOperator{T} end export AbstractKrylovOperator +abstract type AbstractTriangularOperator{T} end +export AbstractTriangularOperator + update!(p::AbstractKrylovPreconditioner, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.") update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) is not implemented.") update!(p::AbstractKrylovOperator, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.") @@ -33,6 +36,9 @@ end function KrylovOperator end export KrylovOperator +function TriangularOperator end +export TriangularOperator + # Preconditioners include("ic0.jl") include("ilu0.jl") diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl index 63ac3fd..d967386 100644 --- a/test/gpu/amd.jl +++ b/test/gpu/amd.jl @@ -40,6 +40,15 @@ include("gpu.jl") end end + @testset "TriangularOperator" begin + @testset "ROCSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64) + test_triangular(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCOO{FC}) + end + @testset "ROCSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64) + test_triangular(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCSR{FC}) + end + end + @testset "Block Jacobi preconditioner" begin test_block_jacobi(ROCBackend(), ROCArray, ROCSparseMatrixCSR) end diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl index 4784183..498f913 100644 --- a/test/gpu/gpu.jl +++ b/test/gpu/gpu.jl @@ -128,6 +128,71 @@ function test_operator(FC, V, DM, SM) end end +function test_triangular(FC, V, DM, SM) + n = 100 + for (uplo, diag, triangle) in [('L', 'U', UnitLowerTriangular), + ('L', 'N', LowerTriangular ), + ('U', 'U', UnitUpperTriangular), + ('U', 'N', UpperTriangular )] + A_cpu = rand(FC, n, n) + A_cpu = uplo == 'L' ? tril(A_cpu) : triu(A_cpu) + A_cpu = diag == 'U' ? A_cpu - Diagonal(A_cpu) + I : A_cpu + A_cpu = sparse(A_cpu) + b_cpu = rand(FC, n) + + A_gpu = SM(A_cpu) + b_gpu = V(b_cpu) + opA_gpu = TriangularOperator(A_gpu, uplo, diag) + for i = 1:5 + y_cpu = rand(FC, n) + x_cpu = rand(FC, n) + ldiv!(y_cpu, triangle(A_cpu), x_cpu) + y_gpu = V(y_cpu) + x_gpu = V(x_cpu) + ldiv!(y_gpu, opA_gpu, x_gpu) + @test collect(y_gpu) ≈ y_cpu + end + for j = 1:5 + y_cpu = rand(FC, n) + x_cpu = rand(FC, n) + A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1) + ldiv!(y_cpu, triangle(A_cpu2), x_cpu) + y_gpu = V(y_cpu) + x_gpu = V(x_cpu) + A_gpu2 = SM(A_cpu2) + update!(opA_gpu, A_gpu2) + ldiv!(y_gpu, opA_gpu, x_gpu) + @test collect(y_gpu) ≈ y_cpu + end + + nrhs = 3 + opA_gpu = TriangularOperator(A_gpu, uplo, diag; nrhs) + for i = 1:5 + Y_cpu = rand(FC, n, nrhs) + X_cpu = rand(FC, n, nrhs) + ldiv!(Y_cpu, triangle(A_cpu), X_cpu) + Y_gpu = DM(Y_cpu) + X_gpu = DM(X_cpu) + ldiv!(Y_gpu, opA_gpu, X_gpu) + @test collect(Y_gpu) ≈ Y_cpu + end + if V.body.name.name != :CuArray + for j = 1:5 + Y_cpu = rand(FC, n, nrhs) + X_cpu = rand(FC, n, nrhs) + A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1) + ldiv!(Y_cpu, triangle(A_cpu2), X_cpu) + Y_gpu = DM(Y_cpu) + X_gpu = DM(X_cpu) + A_gpu2 = SM(A_cpu2) + update!(opA_gpu, A_gpu2) + ldiv!(Y_gpu, opA_gpu, X_gpu) + @test collect(Y_gpu) ≈ Y_cpu + end + end + end +end + _get_type(J::SparseMatrixCSC) = Vector{Float64} function generate_random_system(n::Int, m::Int) diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl index 487f017..6d7c0a5 100644 --- a/test/gpu/nvidia.jl +++ b/test/gpu/nvidia.jl @@ -40,6 +40,15 @@ include("gpu.jl") end end + @testset "TriangularOperator" begin + @testset "CuSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64) + test_triangular(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCOO{FC}) + end + @testset "CuSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64) + test_triangular(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCSR{FC}) + end + end + @testset "Block Jacobi preconditioner" begin test_block_jacobi(CUDABackend(), CuArray, CuSparseMatrixCSR) end