Skip to content

Commit

Permalink
Update IC(0) and ILU(0) for CUDA GPUs (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison authored Nov 30, 2023
1 parent 9745f5f commit 80c493b
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 38 deletions.
10 changes: 5 additions & 5 deletions ext/AMDGPU/ic0.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T,Cint}), :BlasFloat)
@eval begin
function KP.kp_ic0(A::$SparseMatrixType) where T <: $BlasType
P = rocSPARSE.ic0(A, 'O')
return AMD_IC0(P,0.0)
return AMD_IC0(P, 0.0)
end

function KP.update!(p::AMD_IC0{$SparseMatrixType}, A::$SparseMatrixType) where T <: $BlasType
p.P = rocSPARSE.ic0(A, 'O')
end
end
end
Expand Down Expand Up @@ -44,7 +48,3 @@ for ArrayType in (:(ROCVector{T}), :(ROCMatrix{T}))
end
end
end

function KP.update!(p::AMD_IC0, A)
p.P = rocSPARSE.ic0(A, 'O')
end
10 changes: 5 additions & 5 deletions ext/AMDGPU/ilu0.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T,Cint}), :BlasFloat)
@eval begin
function KP.kp_ilu0(A::$SparseMatrixType) where T <: $BlasType
P = rocSPARSE.ilu0(A, 'O')
return AMD_ILU0(P,0.0)
return AMD_ILU0(P, 0.0)
end

function KP.update!(p::AMD_ILU0{$SparseMatrixType}, A::$SparseMatrixType) where T <: $BlasType
p.P = rocSPARSE.ilu0(A, 'O')
end
end
end
Expand Down Expand Up @@ -44,7 +48,3 @@ for ArrayType in (:(ROCVector{T}), :(ROCMatrix{T}))
end
end
end

function KP.update!(p::AMD_ILU0, A)
p.P = rocSPARSE.ilu0(A, 'O')
end
77 changes: 65 additions & 12 deletions ext/CUDA/ic0.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,73 @@
mutable struct IC0Info
info::CUSPARSE.csric02Info_t

function IC0Info()
info_ref = Ref{CUSPARSE.csric02Info_t}()
CUSPARSE.cusparseCreateCsric02Info(info_ref)
obj = new(info_ref[])
finalizer(CUSPARSE.cusparseDestroyCsric02Info, obj)
obj
end
end

unsafe_convert(::Type{CUSPARSE.csric02Info_t}, info::IC0Info) = info.info

mutable struct NVIDIA_IC0{SM} <: AbstractKrylovPreconditioner
P::SM
n::Int
desc::CUSPARSE.CuMatrixDescriptor
buffer::CuVector{UInt8}
info::IC0Info
timer_update::Float64
P::SM
end

for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
(:(CuSparseMatrixCSC{T,Cint}), :BlasReal))
for (bname, aname, sname, T) in ((:cusparseScsric02_bufferSize, :cusparseScsric02_analysis, :cusparseScsric02, :Float32),
(:cusparseDcsric02_bufferSize, :cusparseDcsric02_analysis, :cusparseDcsric02, :Float64),
(:cusparseCcsric02_bufferSize, :cusparseCcsric02_analysis, :cusparseCcsric02, :ComplexF32),
(:cusparseZcsric02_bufferSize, :cusparseZcsric02_analysis, :cusparseZcsric02, :ComplexF64))
@eval begin
function KP.kp_ic0(A::$SparseMatrixType) where T <: $BlasType
P = CUSPARSE.ic02(A)
n = checksquare(A)
return NVIDIA_IC0(P,0.0)
function KP.kp_ic0(A::CuSparseMatrixCSR{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = IC0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsric02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_IC0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_IC0{CuSparseMatrixCSR{$T,Cint}}, A::CuSparseMatrixCSR{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.rowPtr, p.P.colVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end

function KP.kp_ic0(A::CuSparseMatrixCSC{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = IC0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsric02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_IC0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_IC0{CuSparseMatrixCSC{$T,Cint}}, A::CuSparseMatrixCSC{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.colPtr, p.P.rowVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end
end
end
Expand Down Expand Up @@ -45,8 +103,3 @@ for ArrayType in (:(CuVector{T}), :(CuMatrix{T}))
end
end
end

function KP.update!(p::NVIDIA_IC0, A)
p.P = CUSPARSE.ic02(A)
end

76 changes: 65 additions & 11 deletions ext/CUDA/ilu0.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,73 @@
mutable struct ILU0Info
info::CUSPARSE.csrilu02Info_t

function ILU0Info()
info_ref = Ref{CUSPARSE.csrilu02Info_t}()
CUSPARSE.cusparseCreateCsrilu02Info(info_ref)
obj = new(info_ref[])
finalizer(CUSPARSE.cusparseDestroyCsrilu02Info, obj)
obj
end
end

unsafe_convert(::Type{CUSPARSE.csrilu02Info_t}, info::ILU0Info) = info.info

mutable struct NVIDIA_ILU0{SM} <: AbstractKrylovPreconditioner
P::SM
n::Int
desc::CUSPARSE.CuMatrixDescriptor
buffer::CuVector{UInt8}
info::ILU0Info
timer_update::Float64
P::SM
end

for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
(:(CuSparseMatrixCSC{T,Cint}), :BlasReal))
for (bname, aname, sname, T) in ((:cusparseScsrilu02_bufferSize, :cusparseScsrilu02_analysis, :cusparseScsrilu02, :Float32),
(:cusparseDcsrilu02_bufferSize, :cusparseDcsrilu02_analysis, :cusparseDcsrilu02, :Float64),
(:cusparseCcsrilu02_bufferSize, :cusparseCcsrilu02_analysis, :cusparseCcsrilu02, :ComplexF32),
(:cusparseZcsrilu02_bufferSize, :cusparseZcsrilu02_analysis, :cusparseZcsrilu02, :ComplexF64))
@eval begin
function KP.kp_ilu0(A::$SparseMatrixType) where T <: $BlasType
P = CUSPARSE.ilu02(A)
n = checksquare(A)
return NVIDIA_ILU0(P,0.0)
function KP.kp_ilu0(A::CuSparseMatrixCSR{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = ILU0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsrilu02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_ILU0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_ILU0{CuSparseMatrixCSR{$T,Cint}}, A::CuSparseMatrixCSR{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.rowPtr, p.P.colVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end

function KP.kp_ilu0(A::CuSparseMatrixCSC{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = ILU0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsrilu02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_ILU0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_ILU0{CuSparseMatrixCSC{$T,Cint}}, A::CuSparseMatrixCSC{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.colPtr, p.P.rowVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end
end
end
Expand Down Expand Up @@ -45,7 +103,3 @@ for ArrayType in (:(CuVector{T}), :(CuMatrix{T}))
end
end
end

function KP.update!(p::NVIDIA_ILU0, A)
p.P = CUSPARSE.ilu02(A)
end
2 changes: 1 addition & 1 deletion ext/KrylovPreconditionersAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using AMDGPU
using AMDGPU.rocSPARSE
using LinearAlgebra: checksquare, BlasReal, BlasFloat
import LinearAlgebra: ldiv!, mul!
import Base: size, eltype
import Base: size, eltype, unsafe_convert

using KrylovPreconditioners
const KP = KrylovPreconditioners
Expand Down
2 changes: 1 addition & 1 deletion ext/KrylovPreconditionersCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using CUDA
using CUDA.CUSPARSE
using LinearAlgebra: checksquare, BlasReal, BlasFloat
import LinearAlgebra: ldiv!, mul!
import Base: size, eltype
import Base: size, eltype, unsafe_convert

using KrylovPreconditioners
const KP = KrylovPreconditioners
Expand Down
5 changes: 4 additions & 1 deletion src/KrylovPreconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ import LinearAlgebra: ldiv!

abstract type AbstractKrylovPreconditioner end
export AbstractKrylovPreconditioner
update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) not implemented")

update!(p::AbstractKrylovPreconditioner, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.")
update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) is not implemented.")

export update!, get_timer, reset_timer!

function get_timer(p::AbstractKrylovPreconditioner)
Expand Down
20 changes: 18 additions & 2 deletions test/gpu/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ function test_ic0(FC, V, M)
A_gpu = M(A_cpu)
b_gpu = V(b_cpu)
P = kp_ic0(A_gpu)
update!(P, A_gpu)

x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
Expand All @@ -24,6 +23,17 @@ function test_ic0(FC, V, M)
else
@test norm(r_gpu) 1e-8
end

A_gpu = M(A_cpu + 200*I)
update!(P, A_gpu)
x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter 5
if (FC <: ComplexF64) && V.body.name.name == :ROCArray
@test_broken norm(r_gpu) 1e-6
else
@test norm(r_gpu) 1e-8
end
end

function test_ilu0(FC, V, M)
Expand All @@ -36,12 +46,18 @@ function test_ilu0(FC, V, M)
A_gpu = M(A_cpu)
b_gpu = V(b_cpu)
P = kp_ilu0(A_gpu)
update!(P, A_gpu)

x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter 5
@test norm(r_gpu) 1e-8

A_gpu = M(A_cpu + 200*I)
update!(P, A_gpu)
x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter 5
@test norm(r_gpu) 1e-8
end

function test_operator(FC, V, DM, SM)
Expand Down

0 comments on commit 80c493b

Please sign in to comment.