Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update IC(0) and ILU(0) for CUDA GPUs #30

Merged
merged 7 commits into from
Nov 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions ext/AMDGPU/ic0.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T,Cint}), :BlasFloat)
@eval begin
function KP.kp_ic0(A::$SparseMatrixType) where T <: $BlasType
P = rocSPARSE.ic0(A, 'O')
return AMD_IC0(P,0.0)
return AMD_IC0(P, 0.0)
end

function KP.update!(p::AMD_IC0{$SparseMatrixType}, A::$SparseMatrixType) where T <: $BlasType
p.P = rocSPARSE.ic0(A, 'O')
end
end
end
Expand Down Expand Up @@ -44,7 +48,3 @@ for ArrayType in (:(ROCVector{T}), :(ROCMatrix{T}))
end
end
end

function KP.update!(p::AMD_IC0, A)
p.P = rocSPARSE.ic0(A, 'O')
end
10 changes: 5 additions & 5 deletions ext/AMDGPU/ilu0.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T,Cint}), :BlasFloat)
@eval begin
function KP.kp_ilu0(A::$SparseMatrixType) where T <: $BlasType
P = rocSPARSE.ilu0(A, 'O')
return AMD_ILU0(P,0.0)
return AMD_ILU0(P, 0.0)
end

function KP.update!(p::AMD_ILU0{$SparseMatrixType}, A::$SparseMatrixType) where T <: $BlasType
p.P = rocSPARSE.ilu0(A, 'O')
end
end
end
Expand Down Expand Up @@ -44,7 +48,3 @@ for ArrayType in (:(ROCVector{T}), :(ROCMatrix{T}))
end
end
end

function KP.update!(p::AMD_ILU0, A)
p.P = rocSPARSE.ilu0(A, 'O')
end
77 changes: 65 additions & 12 deletions ext/CUDA/ic0.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,73 @@
mutable struct IC0Info
info::CUSPARSE.csric02Info_t

function IC0Info()
info_ref = Ref{CUSPARSE.csric02Info_t}()
CUSPARSE.cusparseCreateCsric02Info(info_ref)
obj = new(info_ref[])
finalizer(CUSPARSE.cusparseDestroyCsric02Info, obj)
obj
end
end

unsafe_convert(::Type{CUSPARSE.csric02Info_t}, info::IC0Info) = info.info

mutable struct NVIDIA_IC0{SM} <: AbstractKrylovPreconditioner
P::SM
n::Int
desc::CUSPARSE.CuMatrixDescriptor
buffer::CuVector{UInt8}
info::IC0Info
timer_update::Float64
P::SM
end

for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
(:(CuSparseMatrixCSC{T,Cint}), :BlasReal))
for (bname, aname, sname, T) in ((:cusparseScsric02_bufferSize, :cusparseScsric02_analysis, :cusparseScsric02, :Float32),
(:cusparseDcsric02_bufferSize, :cusparseDcsric02_analysis, :cusparseDcsric02, :Float64),
(:cusparseCcsric02_bufferSize, :cusparseCcsric02_analysis, :cusparseCcsric02, :ComplexF32),
(:cusparseZcsric02_bufferSize, :cusparseZcsric02_analysis, :cusparseZcsric02, :ComplexF64))
@eval begin
function KP.kp_ic0(A::$SparseMatrixType) where T <: $BlasType
P = CUSPARSE.ic02(A)
n = checksquare(A)
return NVIDIA_IC0(P,0.0)
function KP.kp_ic0(A::CuSparseMatrixCSR{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = IC0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsric02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] ≥ 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_IC0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_IC0{CuSparseMatrixCSR{$T,Cint}}, A::CuSparseMatrixCSR{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.rowPtr, p.P.colVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end

function KP.kp_ic0(A::CuSparseMatrixCSC{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = IC0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsric02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] ≥ 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_IC0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_IC0{CuSparseMatrixCSC{$T,Cint}}, A::CuSparseMatrixCSC{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.colPtr, p.P.rowVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end
end
end
Expand Down Expand Up @@ -45,8 +103,3 @@ for ArrayType in (:(CuVector{T}), :(CuMatrix{T}))
end
end
end

function KP.update!(p::NVIDIA_IC0, A)
p.P = CUSPARSE.ic02(A)
end

76 changes: 65 additions & 11 deletions ext/CUDA/ilu0.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,73 @@
mutable struct ILU0Info
info::CUSPARSE.csrilu02Info_t

function ILU0Info()
info_ref = Ref{CUSPARSE.csrilu02Info_t}()
CUSPARSE.cusparseCreateCsrilu02Info(info_ref)
obj = new(info_ref[])
finalizer(CUSPARSE.cusparseDestroyCsrilu02Info, obj)
obj
end
end

unsafe_convert(::Type{CUSPARSE.csrilu02Info_t}, info::ILU0Info) = info.info

mutable struct NVIDIA_ILU0{SM} <: AbstractKrylovPreconditioner
P::SM
n::Int
desc::CUSPARSE.CuMatrixDescriptor
buffer::CuVector{UInt8}
info::ILU0Info
timer_update::Float64
P::SM
end

for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
(:(CuSparseMatrixCSC{T,Cint}), :BlasReal))
for (bname, aname, sname, T) in ((:cusparseScsrilu02_bufferSize, :cusparseScsrilu02_analysis, :cusparseScsrilu02, :Float32),
(:cusparseDcsrilu02_bufferSize, :cusparseDcsrilu02_analysis, :cusparseDcsrilu02, :Float64),
(:cusparseCcsrilu02_bufferSize, :cusparseCcsrilu02_analysis, :cusparseCcsrilu02, :ComplexF32),
(:cusparseZcsrilu02_bufferSize, :cusparseZcsrilu02_analysis, :cusparseZcsrilu02, :ComplexF64))
@eval begin
function KP.kp_ilu0(A::$SparseMatrixType) where T <: $BlasType
P = CUSPARSE.ilu02(A)
n = checksquare(A)
return NVIDIA_ILU0(P,0.0)
function KP.kp_ilu0(A::CuSparseMatrixCSR{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = ILU0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsrilu02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] ≥ 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.rowPtr, P.colVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_ILU0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_ILU0{CuSparseMatrixCSR{$T,Cint}}, A::CuSparseMatrixCSR{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.rowPtr, p.P.colVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end

function KP.kp_ilu0(A::CuSparseMatrixCSC{$T,Cint})
P = copy(A)
n = checksquare(P)
desc = CUSPARSE.CuMatrixDescriptor('G', 'L', 'N', 'O')
info = ILU0Info()
buffer_size = Ref{Cint}()
CUSPARSE.$bname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, buffer_size)
buffer = CuVector{UInt8}(undef, buffer_size[])
CUSPARSE.$aname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
posit = Ref{Cint}(1)
CUSPARSE.cusparseXcsrilu02_zeroPivot(CUSPARSE.handle(), info, posit)
(posit[] ≥ 0) && error("Structural/numerical zero in A at ($(posit[]),$(posit[])))")
CUSPARSE.$sname(CUSPARSE.handle(), n, nnz(P), desc, P.nzVal, P.colPtr, P.rowVal, info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, buffer)
return NVIDIA_ILU0(n, desc, buffer, info, 0.0, P)
end

function KP.update!(p::NVIDIA_ILU0{CuSparseMatrixCSC{$T,Cint}}, A::CuSparseMatrixCSC{$T,Cint})
copyto!(p.P.nzVal, A.nzVal)
CUSPARSE.$sname(CUSPARSE.handle(), p.n, nnz(p.P), p.desc, p.P.nzVal, p.P.colPtr, p.P.rowVal, p.info, CUSPARSE.CUSPARSE_SOLVE_POLICY_USE_LEVEL, p.buffer)
return p
end
end
end
Expand Down Expand Up @@ -45,7 +103,3 @@ for ArrayType in (:(CuVector{T}), :(CuMatrix{T}))
end
end
end

function KP.update!(p::NVIDIA_ILU0, A)
p.P = CUSPARSE.ilu02(A)
end
2 changes: 1 addition & 1 deletion ext/KrylovPreconditionersAMDGPUExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using AMDGPU
using AMDGPU.rocSPARSE
using LinearAlgebra: checksquare, BlasReal, BlasFloat
import LinearAlgebra: ldiv!, mul!
import Base: size, eltype
import Base: size, eltype, unsafe_convert

using KrylovPreconditioners
const KP = KrylovPreconditioners
Expand Down
2 changes: 1 addition & 1 deletion ext/KrylovPreconditionersCUDAExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using CUDA
using CUDA.CUSPARSE
using LinearAlgebra: checksquare, BlasReal, BlasFloat
import LinearAlgebra: ldiv!, mul!
import Base: size, eltype
import Base: size, eltype, unsafe_convert

using KrylovPreconditioners
const KP = KrylovPreconditioners
Expand Down
5 changes: 4 additions & 1 deletion src/KrylovPreconditioners.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ import LinearAlgebra: ldiv!

abstract type AbstractKrylovPreconditioner end
export AbstractKrylovPreconditioner
update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) not implemented")

update!(p::AbstractKrylovPreconditioner, A::SparseMatrixCSC) = error("update!() for $(typeof(p)) is not implemented.")
update!(p::AbstractKrylovPreconditioner, A) = error("update!() for $(typeof(p)) is not implemented.")

export update!, get_timer, reset_timer!

function get_timer(p::AbstractKrylovPreconditioner)
Expand Down
20 changes: 18 additions & 2 deletions test/gpu/gpu.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ function test_ic0(FC, V, M)
A_gpu = M(A_cpu)
b_gpu = V(b_cpu)
P = kp_ic0(A_gpu)
update!(P, A_gpu)

x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
Expand All @@ -24,6 +23,17 @@ function test_ic0(FC, V, M)
else
@test norm(r_gpu) ≤ 1e-8
end

A_gpu = M(A_cpu + 200*I)
update!(P, A_gpu)
x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter ≤ 5
if (FC <: ComplexF64) && V.body.name.name == :ROCArray
@test_broken norm(r_gpu) ≤ 1e-6
else
@test norm(r_gpu) ≤ 1e-8
end
end

function test_ilu0(FC, V, M)
Expand All @@ -36,12 +46,18 @@ function test_ilu0(FC, V, M)
A_gpu = M(A_cpu)
b_gpu = V(b_cpu)
P = kp_ilu0(A_gpu)
update!(P, A_gpu)

x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter ≤ 5
@test norm(r_gpu) ≤ 1e-8

A_gpu = M(A_cpu + 200*I)
update!(P, A_gpu)
x_gpu, stats = gmres(A_gpu, b_gpu, N=P, ldiv=true)
r_gpu = b_gpu - A_gpu * x_gpu
@test stats.niter ≤ 5
@test norm(r_gpu) ≤ 1e-8
end

function test_operator(FC, V, DM, SM)
Expand Down
Loading