From 3d467785e7aeb98ae834fa20685f29ef1541cf8b Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 22:13:59 -0600
Subject: [PATCH 01/27] Add a KrylovOperator for sparse matrices on NVIDIA GPUs

---
 ext/CUDA/operators.jl               | 58 +++++++++++++++++++++++++++++
 ext/KrylovPreconditionersCUDAExt.jl |  4 +-
 src/KrylovPreconditioners.jl        |  3 ++
 test/gpu/gpu.jl                     | 17 +++++++++
 test/gpu/nvidia.jl                  | 12 ++++++
 5 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 ext/CUDA/operators.jl

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
new file mode 100644
index 0000000..b56ab53
--- /dev/null
+++ b/ext/CUDA/operators.jl
@@ -0,0 +1,58 @@
+mutable struct CUDA_KrylovOperator{T}
+    m::Int
+    n::Int
+    nrhs::Int
+    type::T
+    transa::Char
+    descA::CUSPARSE.CuSparseMatrixDescriptor
+    buffer::CuVector{UInt8}
+end
+
+eltype(A::CUDA_KrylovOperator{T}) where T = T
+size(A::CUDA_KrylovOperator) = (m, n)
+
+for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
+                                     (:(CuSparseMatrixCSC{T,Cint}), :BlasFloat)
+  @eval begin
+    function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
+        m,n = size(A)
+        if nrhs == 1
+            alpha = Ref{T}(one(T))
+            beta = Ref{T}(zero(T))
+            descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
+            descX = CUSPARSE.CuDenseVectorDescriptor(T, n)
+            descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
+            algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
+            buffer_size = Ref{Csize_t}()
+            CUSPARSE.cusparseSpMV_bufferSize(CUDA.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+            buffer = CuVector{UInt8}(undef, buffer_size)
+            return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+        else
+            alpha = Ref{T}(one(T))
+            beta = Ref{T}(zeto(T))
+            descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
+            descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
+            descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)
+            algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
+            buffer_size = Ref{Csize_t}()
+            transb = 'N'
+            CUSPARSE.cusparseSpMM_bufferSize(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+            buffer = CuVector{UInt8}(undef, buffer_size)
+            CUSPARSE.cusparseSpMM_preprocess(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
+            return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+        end
+    end
+
+    function LinearAlgebra.mul!(y::CuVector{T}, A::KrylovOperator{T}, x::CuVector{T}) where T<: BlasFloat
+        descY = CUSPARSE.CuDenseVectorDescriptor(y)
+        descX = CUSPARSE.CuDenseVectorDescriptor(x)
+        CUSPARSE.cusparseSpMV(CUDA.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    end
+
+    function LinearAlgebra.mul!(Y::CuMatrix{T}, A::KrylovOperator{T}, X::CuMatrix{T}) where T<: BlasFloat
+        descY = CUSPARSE.CuDenseVectorDescriptor(y)
+        descX = CUSPARSE.CuDenseVectorDescriptor(x)
+        CUSPARSE.cusparseSpMM(CUDA.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    end
+  end
+end
diff --git a/ext/KrylovPreconditionersCUDAExt.jl b/ext/KrylovPreconditionersCUDAExt.jl
index c0e0f83..f6b941d 100644
--- a/ext/KrylovPreconditionersCUDAExt.jl
+++ b/ext/KrylovPreconditionersCUDAExt.jl
@@ -3,8 +3,7 @@ using LinearAlgebra
 using CUDA
 using CUDA.CUSPARSE
 using LinearAlgebra: checksquare, BlasReal, BlasFloat
-import LinearAlgebra: ldiv!
-using SparseArrays
+import LinearAlgebra: ldiv!, mul!
 
 using KrylovPreconditioners
 const KP = KrylovPreconditioners
@@ -14,5 +13,6 @@ const KA = KernelAbstractions
 include("CUDA/ic0.jl")
 include("CUDA/ilu0.jl")
 include("CUDA/blockjacobi.jl")
+include("CUDA/operators.jl")
 
 end
diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl
index 8c43404..1eb3da3 100644
--- a/src/KrylovPreconditioners.jl
+++ b/src/KrylovPreconditioners.jl
@@ -12,6 +12,9 @@ import LinearAlgebra: ldiv!
 abstract type AbstractKrylovPreconditioner end
 export AbstractKrylovPreconditioner
 
+function KrylovOperator end
+export KrylovOperator
+
 # Preconditioners
 include("ic0.jl")
 include("ilu0.jl")
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 107ad94..9dbd1a8 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -42,6 +42,23 @@ function test_ilu0(FC, V, M)
   @test norm(r_gpu) ≤ 1e-8
 end
 
+function test_operator(FC, V, M)
+  n = 100
+  R = real(FC)
+  A_cpu = rand(FC, m, n)
+  A_cpu = sparse(A_cpu)
+  b_cpu = rand(FC, m)
+
+  A_gpu = M(A_cpu)
+  b_gpu = V(b_cpu)
+  opA_gpu = KrylovOperator(A_gpu)
+
+  x_gpu, stats = lsmr(opA_gpu, b_gpu)
+  r_gpu = b_gpu - A_gpu * x_gpu
+  @test stats.solved
+  @test norm(A' * r_gpu) ≤ 1e-8
+end
+
 _get_type(J::SparseMatrixCSC) = Vector{Float64}
 
 function generate_random_system(n::Int, m::Int)
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index cb155da..dbf212f 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -26,6 +26,18 @@ include("gpu.jl")
     end
   end
 
+  @testset "KrylovOperator" begin
+    @testset "CuSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, CuVector{FC}, CuSparseMatrixCOO{FC})
+    end
+    @testset "CuSparseMatrixCSC -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, CuVector{FC}, CuSparseMatrixCSC{FC})
+    end
+    @testset "CuSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, CuVector{FC}, CuSparseMatrixCSR{FC})
+    end
+  end
+
   @testset "Block Jacobi preconditioner" begin
       if CUDA.functional()
           test_block_jacobi(CUDABackend(), CuArray, CuSparseMatrixCSR)

From 44112fd70b8c5e3a3f0df6f8feff98d7074bda2a Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 22:23:16 -0600
Subject: [PATCH 02/27] Fix a typo

---
 ext/CUDA/operators.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index b56ab53..b8ea8d2 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -12,7 +12,7 @@ eltype(A::CUDA_KrylovOperator{T}) where T = T
 size(A::CUDA_KrylovOperator) = (m, n)
 
 for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
-                                     (:(CuSparseMatrixCSC{T,Cint}), :BlasFloat)
+                                     (:(CuSparseMatrixCSC{T,Cint}), :BlasFloat))
   @eval begin
     function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
         m,n = size(A)

From 3f6d0907938c4671a7c8289326589ba578dc402c Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 22:43:24 -0600
Subject: [PATCH 03/27] Fix a typo

---
 ext/CUDA/operators.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index b8ea8d2..ada9b1f 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -43,13 +43,13 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
         end
     end
 
-    function LinearAlgebra.mul!(y::CuVector{T}, A::KrylovOperator{T}, x::CuVector{T}) where T<: BlasFloat
+    function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVector{T}) where T<: BlasFloat
         descY = CUSPARSE.CuDenseVectorDescriptor(y)
         descX = CUSPARSE.CuDenseVectorDescriptor(x)
         CUSPARSE.cusparseSpMV(CUDA.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
     end
 
-    function LinearAlgebra.mul!(Y::CuMatrix{T}, A::KrylovOperator{T}, X::CuMatrix{T}) where T<: BlasFloat
+    function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T<: BlasFloat
         descY = CUSPARSE.CuDenseVectorDescriptor(y)
         descX = CUSPARSE.CuDenseVectorDescriptor(x)
         CUSPARSE.cusparseSpMM(CUDA.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)

From 213eb68bee9489eb5099a8c37912045dc99f6edb Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 22:49:56 -0600
Subject: [PATCH 04/27] Fix a typo

---
 ext/CUDA/operators.jl | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index ada9b1f..de3ef23 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -42,17 +42,24 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
             return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
         end
     end
+end
 
-    function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVector{T}) where T<: BlasFloat
-        descY = CUSPARSE.CuDenseVectorDescriptor(y)
-        descX = CUSPARSE.CuDenseVectorDescriptor(x)
-        CUSPARSE.cusparseSpMV(CUDA.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
-    end
+function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVector{T}) where T <: BlasFloat
+    (length(y) != A.m) && throw(DimensionMismatch(""))
+    (length(x) != A.n) && throw(DimensionMismatch(""))
+    (A.nrhs != 1) && throw(DimensionMismatch(""))
+    descY = CUSPARSE.CuDenseVectorDescriptor(y)
+    descX = CUSPARSE.CuDenseVectorDescriptor(x)
+    CUSPARSE.cusparseSpMV(CUDA.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+end
 
-    function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T<: BlasFloat
-        descY = CUSPARSE.CuDenseVectorDescriptor(y)
-        descX = CUSPARSE.CuDenseVectorDescriptor(x)
-        CUSPARSE.cusparseSpMM(CUDA.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
-    end
-  end
+function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
+    mY, nY = size(Y)
+    mX, nX = size(X)
+    (mY != A.m) && throw(DimensionMismatch(""))
+    (mX != A.n) && throw(DimensionMismatch(""))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
+    descY = CUSPARSE.CuDenseVectorDescriptor(Y)
+    descX = CUSPARSE.CuDenseVectorDescriptor(X)
+    CUSPARSE.cusparseSpMM(CUDA.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
 end

From c8b74e4f4846ee8f5920542a77edafe1976e5171 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 23:08:18 -0600
Subject: [PATCH 05/27] More fixes

---
 ext/CUDA/operators.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index de3ef23..f9e486c 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -9,10 +9,10 @@ mutable struct CUDA_KrylovOperator{T}
 end
 
 eltype(A::CUDA_KrylovOperator{T}) where T = T
-size(A::CUDA_KrylovOperator) = (m, n)
+size(A::CUDA_KrylovOperator) = (A.m, A.n)
 
-for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T,Cint}), :BlasFloat),
-                                     (:(CuSparseMatrixCSC{T,Cint}), :BlasFloat))
+for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
+                                     (:(CuSparseMatrixCSC{T}), :BlasFloat))
   @eval begin
     function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
         m,n = size(A)

From 38e66581ab42f92431dcf42b0b6dfc527d3db949 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 23:16:19 -0600
Subject: [PATCH 06/27] More fixes

---
 ext/CUDA/operators.jl | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index f9e486c..646d907 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -13,33 +13,34 @@ size(A::CUDA_KrylovOperator) = (A.m, A.n)
 
 for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                                      (:(CuSparseMatrixCSC{T}), :BlasFloat))
-  @eval begin
-    function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
-        m,n = size(A)
-        if nrhs == 1
-            alpha = Ref{T}(one(T))
-            beta = Ref{T}(zero(T))
-            descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
-            descX = CUSPARSE.CuDenseVectorDescriptor(T, n)
-            descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
-            algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
-            buffer_size = Ref{Csize_t}()
-            CUSPARSE.cusparseSpMV_bufferSize(CUDA.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
-            buffer = CuVector{UInt8}(undef, buffer_size)
-            return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
-        else
-            alpha = Ref{T}(one(T))
-            beta = Ref{T}(zeto(T))
-            descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
-            descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
-            descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)
-            algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
-            buffer_size = Ref{Csize_t}()
-            transb = 'N'
-            CUSPARSE.cusparseSpMM_bufferSize(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
-            buffer = CuVector{UInt8}(undef, buffer_size)
-            CUSPARSE.cusparseSpMM_preprocess(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
-            return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+    @eval begin
+        function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
+            m,n = size(A)
+            if nrhs == 1
+                alpha = Ref{T}(one(T))
+                beta = Ref{T}(zero(T))
+                descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
+                descX = CUSPARSE.CuDenseVectorDescriptor(T, n)
+                descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
+                algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
+                buffer_size = Ref{Csize_t}()
+                CUSPARSE.cusparseSpMV_bufferSize(CUDA.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+                buffer = CuVector{UInt8}(undef, buffer_size)
+                return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+            else
+                alpha = Ref{T}(one(T))
+                beta = Ref{T}(zeto(T))
+                descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
+                descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
+                descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)
+                algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
+                buffer_size = Ref{Csize_t}()
+                transb = 'N'
+                CUSPARSE.cusparseSpMM_bufferSize(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+                buffer = CuVector{UInt8}(undef, buffer_size)
+                CUSPARSE.cusparseSpMM_preprocess(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
+                return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+            end
         end
     end
 end

From 23acdcb6a3dc9a6e0217cb792106f68922812596 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 23:36:42 -0600
Subject: [PATCH 07/27] More fixes

---
 ext/CUDA/operators.jl | 4 ++--
 test/gpu/gpu.jl       | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 646d907..af3824a 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -26,7 +26,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 buffer_size = Ref{Csize_t}()
                 CUSPARSE.cusparseSpMV_bufferSize(CUDA.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size)
-                return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+                return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             else
                 alpha = Ref{T}(one(T))
                 beta = Ref{T}(zeto(T))
@@ -39,7 +39,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 CUSPARSE.cusparseSpMM_bufferSize(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size)
                 CUSPARSE.cusparseSpMM_preprocess(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
-                return CUDA_KrylovOperator{T}(m, n, rhs, transa, descA, buffer)
+                return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             end
         end
     end
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 9dbd1a8..54d128f 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -43,7 +43,8 @@ function test_ilu0(FC, V, M)
 end
 
 function test_operator(FC, V, M)
-  n = 100
+  m = 100
+  n = 50
   R = real(FC)
   A_cpu = rand(FC, m, n)
   A_cpu = sparse(A_cpu)

From dee3385b2d9e8b44400895538bec673f4328c057 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Mon, 27 Nov 2023 23:57:42 -0600
Subject: [PATCH 08/27] More fixes

---
 ext/CUDA/operators.jl               | 3 ++-
 ext/KrylovPreconditionersCUDAExt.jl | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index af3824a..7fb8258 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -12,7 +12,8 @@ eltype(A::CUDA_KrylovOperator{T}) where T = T
 size(A::CUDA_KrylovOperator) = (A.m, A.n)
 
 for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
-                                     (:(CuSparseMatrixCSC{T}), :BlasFloat))
+                                     (:(CuSparseMatrixCSC{T}), :BlasFloat),
+                                     (:(CuSparseMatrixCOO{T}), :BlasFloat)))
     @eval begin
         function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
             m,n = size(A)
diff --git a/ext/KrylovPreconditionersCUDAExt.jl b/ext/KrylovPreconditionersCUDAExt.jl
index f6b941d..a74f941 100644
--- a/ext/KrylovPreconditionersCUDAExt.jl
+++ b/ext/KrylovPreconditionersCUDAExt.jl
@@ -4,6 +4,7 @@ using CUDA
 using CUDA.CUSPARSE
 using LinearAlgebra: checksquare, BlasReal, BlasFloat
 import LinearAlgebra: ldiv!, mul!
+import Base: size, eltype
 
 using KrylovPreconditioners
 const KP = KrylovPreconditioners

From ab32b18172fe06c9dad4cd9b2cec9d17de8fe7c0 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:01:05 -0600
Subject: [PATCH 09/27] More fixes

---
 ext/CUDA/operators.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 7fb8258..f5fee4a 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -13,7 +13,7 @@ size(A::CUDA_KrylovOperator) = (A.m, A.n)
 
 for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                                      (:(CuSparseMatrixCSC{T}), :BlasFloat),
-                                     (:(CuSparseMatrixCOO{T}), :BlasFloat)))
+                                     (:(CuSparseMatrixCOO{T}), :BlasFloat))
     @eval begin
         function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
             m,n = size(A)

From 42e42ba7c76cf4fb0201fbaf8b077d71adaa3fda Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:06:15 -0600
Subject: [PATCH 10/27] More fixes

---
 ext/CUDA/operators.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index f5fee4a..890cad2 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -25,7 +25,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 descY = CUSPARSE.CuDenseVectorDescriptor(T, m)
                 algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
                 buffer_size = Ref{Csize_t}()
-                CUSPARSE.cusparseSpMV_bufferSize(CUDA.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+                CUSPARSE.cusparseSpMV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size)
                 return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             else
@@ -37,9 +37,9 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
                 buffer_size = Ref{Csize_t}()
                 transb = 'N'
-                CUSPARSE.cusparseSpMM_bufferSize(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
+                CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size)
-                CUSPARSE.cusparseSpMM_preprocess(CUDA.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
+                CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
                 return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             end
         end
@@ -52,7 +52,7 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVect
     (A.nrhs != 1) && throw(DimensionMismatch(""))
     descY = CUSPARSE.CuDenseVectorDescriptor(y)
     descX = CUSPARSE.CuDenseVectorDescriptor(x)
-    CUSPARSE.cusparseSpMV(CUDA.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
 end
 
 function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
@@ -63,5 +63,5 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatr
     (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
     descY = CUSPARSE.CuDenseVectorDescriptor(Y)
     descX = CUSPARSE.CuDenseVectorDescriptor(X)
-    CUSPARSE.cusparseSpMM(CUDA.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
 end

From 4d77d658e6648d85f672745d09014402e238c078 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:09:45 -0600
Subject: [PATCH 11/27] More fixes

---
 ext/CUDA/operators.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 890cad2..296c830 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -26,7 +26,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
                 buffer_size = Ref{Csize_t}()
                 CUSPARSE.cusparseSpMV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
-                buffer = CuVector{UInt8}(undef, buffer_size)
+                buffer = CuVector{UInt8}(undef, buffer_size[])
                 return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             else
                 alpha = Ref{T}(one(T))
@@ -38,7 +38,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 buffer_size = Ref{Csize_t}()
                 transb = 'N'
                 CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
-                buffer = CuVector{UInt8}(undef, buffer_size)
+                buffer = CuVector{UInt8}(undef, buffer_size[])
                 CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
                 return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
             end
@@ -61,7 +61,7 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatr
     (mY != A.m) && throw(DimensionMismatch(""))
     (mX != A.n) && throw(DimensionMismatch(""))
     (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
-    descY = CUSPARSE.CuDenseVectorDescriptor(Y)
-    descX = CUSPARSE.CuDenseVectorDescriptor(X)
+    descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
+    descX = CUSPARSE.CuDenseMatrixDescriptor(X)
     CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
 end

From 3575997ed3685543044fe30f2af6c23317cfa0e3 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:14:26 -0600
Subject: [PATCH 12/27] More fixes

---
 ext/CUDA/operators.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 296c830..fda7042 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -1,8 +1,8 @@
 mutable struct CUDA_KrylovOperator{T}
+    type::Type{T}
     m::Int
     n::Int
     nrhs::Int
-    type::T
     transa::Char
     descA::CUSPARSE.CuSparseMatrixDescriptor
     buffer::CuVector{UInt8}
@@ -27,7 +27,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 buffer_size = Ref{Csize_t}()
                 CUSPARSE.cusparseSpMV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size[])
-                return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
+                return CUDA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             else
                 alpha = Ref{T}(one(T))
                 beta = Ref{T}(zeto(T))
@@ -40,7 +40,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size[])
                 CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
-                return CUDA_KrylovOperator{T}(m, n, nrhs, transa, descA, buffer)
+                return CUDA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             end
         end
     end

From 44514a3c76dd5e3cde1cfc6341dc324efdbb75b1 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:18:30 -0600
Subject: [PATCH 13/27] More fixes

---
 test/gpu/gpu.jl | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 54d128f..65453d5 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -43,21 +43,20 @@ function test_ilu0(FC, V, M)
 end
 
 function test_operator(FC, V, M)
-  m = 100
-  n = 50
+  n = 100
   R = real(FC)
-  A_cpu = rand(FC, m, n)
+  A_cpu = rand(FC, n, n)
   A_cpu = sparse(A_cpu)
-  b_cpu = rand(FC, m)
+  b_cpu = rand(FC, n)
 
   A_gpu = M(A_cpu)
   b_gpu = V(b_cpu)
   opA_gpu = KrylovOperator(A_gpu)
 
-  x_gpu, stats = lsmr(opA_gpu, b_gpu)
+  x_gpu, stats = gmres(opA_gpu, b_gpu)
   r_gpu = b_gpu - A_gpu * x_gpu
   @test stats.solved
-  @test norm(A' * r_gpu) ≤ 1e-8
+  @test norm(r_gpu) ≤ 1e-8
 end
 
 _get_type(J::SparseMatrixCSC) = Vector{Float64}

From faf75e9fcd343c3c0faaa08dc58704ebfee7b0ec Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:23:01 -0600
Subject: [PATCH 14/27] More fixes

---
 ext/CUDA/operators.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index fda7042..59bac7b 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -52,7 +52,8 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVect
     (A.nrhs != 1) && throw(DimensionMismatch(""))
     descY = CUSPARSE.CuDenseVectorDescriptor(y)
     descX = CUSPARSE.CuDenseVectorDescriptor(x)
-    CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
+    CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, algo, A.buffer)
 end
 
 function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
@@ -63,5 +64,6 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatr
     (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
     descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
     descX = CUSPARSE.CuDenseMatrixDescriptor(X)
-    CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, A.algo, A.buffer)
+    algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
+    CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, algo, A.buffer)
 end

From c4ab48b5acb79f742acb4fdffa5ae4a360d9cb17 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 00:27:48 -0600
Subject: [PATCH 15/27] More fixes

---
 ext/CUDA/operators.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 59bac7b..f4b65da 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -53,7 +53,7 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVect
     descY = CUSPARSE.CuDenseVectorDescriptor(y)
     descX = CUSPARSE.CuDenseVectorDescriptor(x)
     algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
-    CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, one(T), A.descA, descX, zero(T), descY, T, algo, A.buffer)
+    CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer)
 end
 
 function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
@@ -65,5 +65,5 @@ function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatr
     descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
     descX = CUSPARSE.CuDenseMatrixDescriptor(X)
     algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT
-    CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', one(T), A.descA, descX, zero(T), descY, T, algo, A.buffer)
+    CUSPARSE.cusparseSpMM(CUSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer)
 end

From a79a64839989178298051f157fb3a06d9e420994 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 01:14:30 -0600
Subject: [PATCH 16/27] [AMDGPU.jl] Add a KrylovOperator for sparse matrices

---
 ext/AMDGPU/operators.jl               | 74 +++++++++++++++++++++++++++
 ext/CUDA/operators.jl                 | 14 ++---
 ext/KrylovPreconditionersAMDGPUExt.jl |  5 +-
 test/gpu/amd.jl                       | 12 +++++
 4 files changed, 96 insertions(+), 9 deletions(-)
 create mode 100644 ext/AMDGPU/operators.jl

diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl
new file mode 100644
index 0000000..d6fdf12
--- /dev/null
+++ b/ext/AMDGPU/operators.jl
@@ -0,0 +1,74 @@
+mutable struct AMD_KrylovOperator{T}
+    type::Type{T}
+    m::Int
+    n::Int
+    nrhs::Int
+    transa::Char
+    descA::rocSPARSE.ROCSparseMatrixDescriptor
+    buffer_size::Ref{Csize_t}
+    buffer::ROCVector{UInt8}
+end
+
+eltype(A::AMD_KrylovOperator{T}) where T = T
+size(A::AMD_KrylovOperator) = (A.m, A.n)
+
+for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat),
+                                     (:(ROCSparseMatrixCSC{T}), :BlasFloat),
+                                     (:(ROCSparseMatrixCOO{T}), :BlasFloat))
+    @eval begin
+        function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
+            m,n = size(A)
+            if nrhs == 1
+                alpha = Ref{T}(one(T))
+                beta = Ref{T}(zero(T))
+                descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
+                descX = rocSPARSE.ROCDenseVectorDescriptor(T, n)
+                descY = rocSPARSE.ROCDenseVectorDescriptor(T, m)
+                algo = rocSPARSE.rocSPARSE.rocsparse_spmv_alg_default
+                buffer_size = Ref{Csize_t}()
+                rocSPARSE.rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size, C_NULL)
+                buffer = ROCVector{UInt8}(undef, buffer_size[])
+                return AMD_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
+            else
+                alpha = Ref{T}(one(T))
+                beta = Ref{T}(zeto(T))
+                descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
+                descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs)
+                descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs)
+                algo = rocSPARSE.rocsparse_spmm_alg_default
+                buffer_size = Ref{Csize_t}()
+                transb = 'N'
+                rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T,
+                                         algo, rocSPARSE.rocsparse_spmm_stage_buffer_size, buffer_size, C_NULL)
+                buffer = ROCVector{UInt8}(undef, buffer_size[])
+                rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, beta, descY, T,
+                                         algo, rocSPARSE.rocsparse_spmm_stage_preprocess, buffer_size, buffer)
+                return AMD_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
+            end
+        end
+    end
+end
+
+function LinearAlgebra.mul!(y::ROCVector{T}, A::AMD_KrylovOperator{T}, x::ROCVector{T}) where T <: BlasFloat
+    (length(y) != A.m) && throw(DimensionMismatch(""))
+    (length(x) != A.n) && throw(DimensionMismatch(""))
+    (A.nrhs != 1) && throw(DimensionMismatch(""))
+    descY = rocSPARSE.ROCDenseVectorDescriptor(y)
+    descX = rocSPARSE.ROCDenseVectorDescriptor(x)
+    algo = rocSPARSE.rocsparse_spmv_alg_default
+    rocSPARSE.rocsparse_spmv(rocSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX,
+                             Ref{T}(zero(T)), descY, T, algo, A.buffer_size, A.buffer)
+end
+
+function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat
+    mY, nY = size(Y)
+    mX, nX = size(X)
+    (mY != A.m) && throw(DimensionMismatch(""))
+    (mX != A.n) && throw(DimensionMismatch(""))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
+    descY = rocSPARSE.ROCDenseMatrixDescriptor(Y)
+    descX = rocSPARSE.ROCDenseMatrixDescriptor(X)
+    algo = rocSPARSE.rocsparse_spmm_alg_default
+    rocSPARSE.rocsparse_spmm(rocSPARSE.handle(), A.transa, 'N', Ref{T}(one(T)), A.descA, descX,
+                             Ref{T}(zero(T)), descY, T, algo, rocSPARSE.rocsparse_spmm_stage_compute, A.buffer_size, A.buffer)
+end
diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index f4b65da..81d3e35 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -1,4 +1,4 @@
-mutable struct CUDA_KrylovOperator{T}
+mutable struct NVIDIA_KrylovOperator{T}
     type::Type{T}
     m::Int
     n::Int
@@ -8,8 +8,8 @@ mutable struct CUDA_KrylovOperator{T}
     buffer::CuVector{UInt8}
 end
 
-eltype(A::CUDA_KrylovOperator{T}) where T = T
-size(A::CUDA_KrylovOperator) = (A.m, A.n)
+eltype(A::NVIDIA_KrylovOperator{T}) where T = T
+size(A::NVIDIA_KrylovOperator) = (A.m, A.n)
 
 for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                                      (:(CuSparseMatrixCSC{T}), :BlasFloat),
@@ -27,7 +27,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 buffer_size = Ref{Csize_t}()
                 CUSPARSE.cusparseSpMV_bufferSize(CUSPARSE.handle(), transa, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size[])
-                return CUDA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
+                return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             else
                 alpha = Ref{T}(one(T))
                 beta = Ref{T}(zeto(T))
@@ -40,13 +40,13 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size[])
                 CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
-                return CUDA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
+                return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             end
         end
     end
 end
 
-function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVector{T}) where T <: BlasFloat
+function LinearAlgebra.mul!(y::CuVector{T}, A::NVIDIA_KrylovOperator{T}, x::CuVector{T}) where T <: BlasFloat
     (length(y) != A.m) && throw(DimensionMismatch(""))
     (length(x) != A.n) && throw(DimensionMismatch(""))
     (A.nrhs != 1) && throw(DimensionMismatch(""))
@@ -56,7 +56,7 @@ function LinearAlgebra.mul!(y::CuVector{T}, A::CUDA_KrylovOperator{T}, x::CuVect
     CUSPARSE.cusparseSpMV(CUSPARSE.handle(), A.transa, Ref{T}(one(T)), A.descA, descX, Ref{T}(zero(T)), descY, T, algo, A.buffer)
 end
 
-function LinearAlgebra.mul!(Y::CuMatrix{T}, A::CUDA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
+function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
     mY, nY = size(Y)
     mX, nX = size(X)
     (mY != A.m) && throw(DimensionMismatch(""))
diff --git a/ext/KrylovPreconditionersAMDGPUExt.jl b/ext/KrylovPreconditionersAMDGPUExt.jl
index b75c28c..54e8e6e 100644
--- a/ext/KrylovPreconditionersAMDGPUExt.jl
+++ b/ext/KrylovPreconditionersAMDGPUExt.jl
@@ -3,8 +3,8 @@ using LinearAlgebra
 using AMDGPU
 using AMDGPU.rocSPARSE
 using LinearAlgebra: checksquare, BlasReal, BlasFloat
-import LinearAlgebra: ldiv!
-using SparseArrays
+import LinearAlgebra: ldiv!, mul!
+import Base: size, eltype
 
 using KrylovPreconditioners
 const KP = KrylovPreconditioners
@@ -14,5 +14,6 @@ const KA = KernelAbstractions
 include("AMDGPU/ic0.jl")
 include("AMDGPU/ilu0.jl")
 include("AMDGPU/blockjacobi.jl")
+# include("AMDGPU/operators.jl")
 
 end
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index 366e2cd..0948908 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -26,6 +26,18 @@ include("gpu.jl")
     end
   end
 
+  # @testset "KrylovOperator" begin
+  #   @testset "ROCSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
+  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCOO{FC})
+  #   end
+  #   @testset "ROCSparseMatrixCSC -- $FC" for FC in (Float64, ComplexF64)
+  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCSC{FC})
+  #   end
+  #   @testset "ROCSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
+  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCSR{FC})
+  #   end
+  # end
+
   @testset "Block Jacobi preconditioner" begin
       if AMDGPU.functional()
           test_block_jacobi(ROCBackend(), ROCArray, ROCSparseMatrixCSR)

From c86714986919a66d3901f4377420f90b4ff30c09 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 11:57:37 -0600
Subject: [PATCH 17/27] Add more tests with KrylovOperators

---
 test/gpu/gpu.jl    | 27 ++++++++++++++++++++++++---
 test/gpu/nvidia.jl |  6 +++---
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 65453d5..4f345d4 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -42,14 +42,14 @@ function test_ilu0(FC, V, M)
   @test norm(r_gpu) ≤ 1e-8
 end
 
-function test_operator(FC, V, M)
+function test_operator(FC, V, DM, SM)
+  m = 200
   n = 100
-  R = real(FC)
   A_cpu = rand(FC, n, n)
   A_cpu = sparse(A_cpu)
   b_cpu = rand(FC, n)
 
-  A_gpu = M(A_cpu)
+  A_gpu = SM(A_cpu)
   b_gpu = V(b_cpu)
   opA_gpu = KrylovOperator(A_gpu)
 
@@ -57,6 +57,27 @@ function test_operator(FC, V, M)
   r_gpu = b_gpu - A_gpu * x_gpu
   @test stats.solved
   @test norm(r_gpu) ≤ 1e-8
+
+  A_cpu = rand(FC, m, n)
+  A_cpu = sparse(A_cpu)
+  opA_gpu = KrylovOperator(A_gpu)
+  for i = 1:5
+    y_cpu = rand(FC, m)
+    y_gpu = V(y_cpu)
+    x_cpu = rand(FC, n)
+    x_gpu = V(x_cpu)
+    mul!(y_gpu, opA_gpu, x_gpu)
+  end
+
+  nrhs = 3
+  opA_gpu = KrylovOperator(A_gpu; nrhs)
+  for i = 1:5
+    Y_cpu = rand(FC, m, nrhs)
+    Y_gpu = DM(Y_cpu)
+    X_cpu = rand(FC, n, nrhs)
+    X_gpu = DM(X_cpu)
+    mul!(Y_gpu, opA_gpu, X_gpu)
+  end
 end
 
 _get_type(J::SparseMatrixCSC) = Vector{Float64}
diff --git a/test/gpu/nvidia.jl b/test/gpu/nvidia.jl
index dbf212f..655e1df 100644
--- a/test/gpu/nvidia.jl
+++ b/test/gpu/nvidia.jl
@@ -28,13 +28,13 @@ include("gpu.jl")
 
   @testset "KrylovOperator" begin
     @testset "CuSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
-      test_operator(FC, CuVector{FC}, CuSparseMatrixCOO{FC})
+      test_operator(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCOO{FC})
     end
     @testset "CuSparseMatrixCSC -- $FC" for FC in (Float64, ComplexF64)
-      test_operator(FC, CuVector{FC}, CuSparseMatrixCSC{FC})
+      test_operator(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCSC{FC})
     end
     @testset "CuSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
-      test_operator(FC, CuVector{FC}, CuSparseMatrixCSR{FC})
+      test_operator(FC, CuVector{FC}, CuMatrix{FC}, CuSparseMatrixCSR{FC})
     end
   end
 

From 68403ce1a959126e50562554ab3275d067d4bd3c Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 13:16:06 -0600
Subject: [PATCH 18/27] debug

---
 ext/CUDA/operators.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 81d3e35..13ac454 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -47,9 +47,9 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
 end
 
 function LinearAlgebra.mul!(y::CuVector{T}, A::NVIDIA_KrylovOperator{T}, x::CuVector{T}) where T <: BlasFloat
-    (length(y) != A.m) && throw(DimensionMismatch(""))
-    (length(x) != A.n) && throw(DimensionMismatch(""))
-    (A.nrhs != 1) && throw(DimensionMismatch(""))
+    (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
+    (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
+    (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
     descY = CUSPARSE.CuDenseVectorDescriptor(y)
     descX = CUSPARSE.CuDenseVectorDescriptor(x)
     algo = CUSPARSE.CUSPARSE_SPMV_ALG_DEFAULT
@@ -59,9 +59,9 @@ end
 function LinearAlgebra.mul!(Y::CuMatrix{T}, A::NVIDIA_KrylovOperator{T}, X::CuMatrix{T}) where T <: BlasFloat
     mY, nY = size(Y)
     mX, nX = size(X)
-    (mY != A.m) && throw(DimensionMismatch(""))
-    (mX != A.n) && throw(DimensionMismatch(""))
-    (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
+    (mY != A.m) && throw(DimensionMismatch("mY != A.m"))
+    (mX != A.n) && throw(DimensionMismatch("mX != A.n"))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
     descY = CUSPARSE.CuDenseMatrixDescriptor(Y)
     descX = CUSPARSE.CuDenseMatrixDescriptor(X)
     algo = CUSPARSE.CUSPARSE_SPMM_ALG_DEFAULT

From 0bdfa60eeaeede1edff92383dab40c268297f588 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 13:32:11 -0600
Subject: [PATCH 19/27] debug

---
 ext/KrylovPreconditionersAMDGPUExt.jl |  2 +-
 test/gpu/amd.jl                       | 22 +++++++++++-----------
 test/gpu/gpu.jl                       |  8 ++++++--
 3 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/ext/KrylovPreconditionersAMDGPUExt.jl b/ext/KrylovPreconditionersAMDGPUExt.jl
index 54e8e6e..0355a2a 100644
--- a/ext/KrylovPreconditionersAMDGPUExt.jl
+++ b/ext/KrylovPreconditionersAMDGPUExt.jl
@@ -14,6 +14,6 @@ const KA = KernelAbstractions
 include("AMDGPU/ic0.jl")
 include("AMDGPU/ilu0.jl")
 include("AMDGPU/blockjacobi.jl")
-# include("AMDGPU/operators.jl")
+include("AMDGPU/operators.jl")
 
 end
diff --git a/test/gpu/amd.jl b/test/gpu/amd.jl
index 0948908..fbd7a07 100644
--- a/test/gpu/amd.jl
+++ b/test/gpu/amd.jl
@@ -26,17 +26,17 @@ include("gpu.jl")
     end
   end
 
-  # @testset "KrylovOperator" begin
-  #   @testset "ROCSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
-  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCOO{FC})
-  #   end
-  #   @testset "ROCSparseMatrixCSC -- $FC" for FC in (Float64, ComplexF64)
-  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCSC{FC})
-  #   end
-  #   @testset "ROCSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
-  #     test_operator(FC, ROCVector{FC}, ROCSparseMatrixCSR{FC})
-  #   end
-  # end
+  @testset "KrylovOperator" begin
+    @testset "ROCSparseMatrixCOO -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCOO{FC})
+    end
+    @testset "ROCSparseMatrixCSC -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCSC{FC})
+    end
+    @testset "ROCSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, ROCVector{FC}, ROCMatrix{FC}, ROCSparseMatrixCSR{FC})
+    end
+  end
 
   @testset "Block Jacobi preconditioner" begin
       if AMDGPU.functional()
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 4f345d4..3abdc2c 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -63,20 +63,24 @@ function test_operator(FC, V, DM, SM)
   opA_gpu = KrylovOperator(A_gpu)
   for i = 1:5
     y_cpu = rand(FC, m)
-    y_gpu = V(y_cpu)
     x_cpu = rand(FC, n)
+    mul!(y_cpu, A_cpu, x_cpu)
+    y_gpu = V(y_cpu)
     x_gpu = V(x_cpu)
     mul!(y_gpu, opA_gpu, x_gpu)
+    @test collect(y_gpu) ≈ y_cpu
   end
 
   nrhs = 3
   opA_gpu = KrylovOperator(A_gpu; nrhs)
   for i = 1:5
     Y_cpu = rand(FC, m, nrhs)
-    Y_gpu = DM(Y_cpu)
     X_cpu = rand(FC, n, nrhs)
+    mul!(Y_cpu, A_cpu, X_cpu)
+    Y_gpu = DM(Y_cpu)
     X_gpu = DM(X_cpu)
     mul!(Y_gpu, opA_gpu, X_gpu)
+    @test collect(Y_gpu) ≈ Y_cpu
   end
 end
 

From 5b91db338ea606fe88da9ef34976711b1f1b3bbb Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 13:46:46 -0600
Subject: [PATCH 20/27] debug

---
 .buildkite/pipeline.yml |  3 ++-
 ext/AMDGPU/operators.jl | 12 ++++++------
 test/gpu/gpu.jl         |  2 ++
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 56d7207..2942169 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -33,7 +33,8 @@ steps:
       julia --color=yes --project=test/gpu -e '
         using Pkg
         Pkg.develop(path=".")
-        Pkg.add("AMDGPU")
+        # Pkg.add("AMDGPU")
+        Pkg.add(url="https://github.com/JuliaGPU/AMDGPU.jl", rev="master")
         Pkg.add("Krylov")
         Pkg.instantiate()
         include("test/gpu/amd.jl")'
diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl
index d6fdf12..407c284 100644
--- a/ext/AMDGPU/operators.jl
+++ b/ext/AMDGPU/operators.jl
@@ -50,9 +50,9 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat),
 end
 
 function LinearAlgebra.mul!(y::ROCVector{T}, A::AMD_KrylovOperator{T}, x::ROCVector{T}) where T <: BlasFloat
-    (length(y) != A.m) && throw(DimensionMismatch(""))
-    (length(x) != A.n) && throw(DimensionMismatch(""))
-    (A.nrhs != 1) && throw(DimensionMismatch(""))
+    (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
+    (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
+    (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
     descY = rocSPARSE.ROCDenseVectorDescriptor(y)
     descX = rocSPARSE.ROCDenseVectorDescriptor(x)
     algo = rocSPARSE.rocsparse_spmv_alg_default
@@ -63,9 +63,9 @@ end
 function LinearAlgebra.mul!(Y::ROCMatrix{T}, A::AMD_KrylovOperator{T}, X::ROCMatrix{T}) where T <: BlasFloat
     mY, nY = size(Y)
     mX, nX = size(X)
-    (mY != A.m) && throw(DimensionMismatch(""))
-    (mX != A.n) && throw(DimensionMismatch(""))
-    (nY == nX == A.nrhs) || throw(DimensionMismatch(""))
+    (mY != A.m) && throw(DimensionMismatch("mY != A.m"))
+    (mX != A.n) && throw(DimensionMismatch("mX != A.n"))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
     descY = rocSPARSE.ROCDenseMatrixDescriptor(Y)
     descX = rocSPARSE.ROCDenseMatrixDescriptor(X)
     algo = rocSPARSE.rocsparse_spmm_alg_default
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 3abdc2c..23ea318 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -60,6 +60,8 @@ function test_operator(FC, V, DM, SM)
 
   A_cpu = rand(FC, m, n)
   A_cpu = sparse(A_cpu)
+  A_gpu = SM(A_cpu)
+
   opA_gpu = KrylovOperator(A_gpu)
   for i = 1:5
     y_cpu = rand(FC, m)

From 76bd1bdbf393551f09869ad7bd30ccbbcdfa3f50 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 13:58:50 -0600
Subject: [PATCH 21/27] debug

---
 ext/AMDGPU/operators.jl | 2 +-
 ext/CUDA/operators.jl   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl
index 407c284..f77504d 100644
--- a/ext/AMDGPU/operators.jl
+++ b/ext/AMDGPU/operators.jl
@@ -31,7 +31,7 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat),
                 return AMD_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer)
             else
                 alpha = Ref{T}(one(T))
-                beta = Ref{T}(zeto(T))
+                beta = Ref{T}(zero(T))
                 descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O')
                 descX = rocSPARSE.ROCDenseMatrixDescriptor(T, n, nrhs)
                 descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs)
diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 13ac454..4671c21 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -30,7 +30,7 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             else
                 alpha = Ref{T}(one(T))
-                beta = Ref{T}(zeto(T))
+                beta = Ref{T}(zero(T))
                 descA = CUSPARSE.CuSparseMatrixDescriptor(A, 'O')
                 descX = CUSPARSE.CuDenseMatrixDescriptor(T, n, nrhs)
                 descY = CUSPARSE.CuDenseMatrixDescriptor(T, m, nrhs)

From 7db244f99940f7d1e41518e2efa429a8071545b0 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 14:30:18 -0600
Subject: [PATCH 22/27] debug

---
 ext/CUDA/operators.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 4671c21..9733020 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -39,7 +39,9 @@ for (SparseMatrixType, BlasType) in ((:(CuSparseMatrixCSR{T}), :BlasFloat),
                 transb = 'N'
                 CUSPARSE.cusparseSpMM_bufferSize(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer_size)
                 buffer = CuVector{UInt8}(undef, buffer_size[])
-                CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
+                if !(A isa CuSparseMatrixCOO)
+                    CUSPARSE.cusparseSpMM_preprocess(CUSPARSE.handle(), transa, transb, alpha, descA, descX, beta, descY, T, algo, buffer)
+                end
                 return NVIDIA_KrylovOperator{T}(T, m, n, nrhs, transa, descA, buffer)
             end
         end

From ac5623984cf0778c03f1f2a37636caad2a5ab1f0 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 14:39:18 -0600
Subject: [PATCH 23/27] debug

---
 ext/AMDGPU/operators.jl      | 2 +-
 ext/CUDA/operators.jl        | 2 +-
 src/KrylovPreconditioners.jl | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl
index f77504d..7413d46 100644
--- a/ext/AMDGPU/operators.jl
+++ b/ext/AMDGPU/operators.jl
@@ -1,4 +1,4 @@
-mutable struct AMD_KrylovOperator{T}
+mutable struct AMD_KrylovOperator{T} <: AbstractKrylovOperator{T}
     type::Type{T}
     m::Int
     n::Int
diff --git a/ext/CUDA/operators.jl b/ext/CUDA/operators.jl
index 9733020..fb0ec88 100644
--- a/ext/CUDA/operators.jl
+++ b/ext/CUDA/operators.jl
@@ -1,4 +1,4 @@
-mutable struct NVIDIA_KrylovOperator{T}
+mutable struct NVIDIA_KrylovOperator{T} <: AbstractKrylovOperator{T}
     type::Type{T}
     m::Int
     n::Int
diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl
index 1eb3da3..4b51c9d 100644
--- a/src/KrylovPreconditioners.jl
+++ b/src/KrylovPreconditioners.jl
@@ -12,6 +12,9 @@ import LinearAlgebra: ldiv!
 abstract type AbstractKrylovPreconditioner end
 export AbstractKrylovPreconditioner
 
+abstract type AbstractKrylovOperator{T} end
+export AbstractKrylovOperator end
+
 function KrylovOperator end
 export KrylovOperator
 

From fe6570744aab08a641f33b7d9180008ea1ead9b4 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Tue, 28 Nov 2023 14:40:21 -0600
Subject: [PATCH 24/27] debug

---
 src/KrylovPreconditioners.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl
index 4b51c9d..97e0e22 100644
--- a/src/KrylovPreconditioners.jl
+++ b/src/KrylovPreconditioners.jl
@@ -13,7 +13,7 @@ abstract type AbstractKrylovPreconditioner end
 export AbstractKrylovPreconditioner
 
 abstract type AbstractKrylovOperator{T} end
-export AbstractKrylovOperator end
+export AbstractKrylovOperator
 
 function KrylovOperator end
 export KrylovOperator

From 3805f13f5319d89e4a0d4ce01c93fe9c8460a177 Mon Sep 17 00:00:00 2001
From: Michel Schanen <mschanen@anl.gov>
Date: Wed, 29 Nov 2023 15:48:17 -0500
Subject: [PATCH 25/27] Fix broken IC0 test

---
 test/gpu/gpu.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 23ea318..bff3d72 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -18,7 +18,7 @@ function test_ic0(FC, V, M)
   x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
   r_gpu = b_gpu - A_gpu * x_gpu
   @test stats.niter ≤ 5
-  if (FC <: ComplexF64) && typeof(V).name.name == :ROCArray
+  if (FC <: ComplexF64) && V <: ROCArray{ComplexF64, 1}
     @test_broken norm(r_gpu) ≤ 1e-6
   else
     @test norm(r_gpu) ≤ 1e-8

From 082d49b8799ae034d42274603ae925a79b7a902e Mon Sep 17 00:00:00 2001
From: Alexis Montoison <35051714+amontoison@users.noreply.github.com>
Date: Wed, 29 Nov 2023 15:12:15 -0600
Subject: [PATCH 26/27] Update test/gpu/gpu.jl

---
 test/gpu/gpu.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index bff3d72..f42fe4b 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -18,7 +18,7 @@ function test_ic0(FC, V, M)
   x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
   r_gpu = b_gpu - A_gpu * x_gpu
   @test stats.niter ≤ 5
-  if (FC <: ComplexF64) && V <: ROCArray{ComplexF64, 1}
+  if (FC <: ComplexF64) && V.name.name == :ROCArray
     @test_broken norm(r_gpu) ≤ 1e-6
   else
     @test norm(r_gpu) ≤ 1e-8

From 7377271d48c5f69a8d847f3e10da5074506adeeb Mon Sep 17 00:00:00 2001
From: Alexis Montoison <alexis.montoison@polymtl.ca>
Date: Wed, 29 Nov 2023 23:12:56 -0600
Subject: [PATCH 27/27] Update gpu.jl

---
 test/gpu/gpu.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index f42fe4b..c852ad7 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -18,7 +18,7 @@ function test_ic0(FC, V, M)
   x_gpu, stats = cg(A_gpu, b_gpu, M=P, ldiv=true)
   r_gpu = b_gpu - A_gpu * x_gpu
   @test stats.niter ≤ 5
-  if (FC <: ComplexF64) && V.name.name == :ROCArray
+  if (FC <: ComplexF64) && V.body.name.name == :ROCArray
     @test_broken norm(r_gpu) ≤ 1e-6
   else
     @test norm(r_gpu) ≤ 1e-8