diff --git a/ext/KrylovPreconditionersoneAPIExt.jl b/ext/KrylovPreconditionersoneAPIExt.jl
index 4435ba0..36ff42a 100644
--- a/ext/KrylovPreconditionersoneAPIExt.jl
+++ b/ext/KrylovPreconditionersoneAPIExt.jl
@@ -2,6 +2,7 @@ module KrylovPreconditionersoneAPIExt
 using LinearAlgebra
 using SparseArrays
 using oneAPI
+using oneAPI: global_queue, sycl_queue, context, device
 using oneAPI.oneMKL
 using LinearAlgebra: checksquare, BlasReal, BlasFloat
 import LinearAlgebra: ldiv!, mul!
@@ -13,5 +14,6 @@ using KernelAbstractions
 const KA = KernelAbstractions
 
 include("oneAPI/blockjacobi.jl")
+include("oneAPI/operators.jl")
 
 end
diff --git a/ext/oneAPI/operators.jl b/ext/oneAPI/operators.jl
new file mode 100644
index 0000000..b4d8603
--- /dev/null
+++ b/ext/oneAPI/operators.jl
@@ -0,0 +1,95 @@
+mutable struct INTEL_KrylovOperator{T} <: AbstractKrylovOperator{T}
+    type::Type{T}
+    m::Int
+    n::Int
+    nrhs::Int
+    transa::Char
+    matrix::oneSparseMatrixCSR{T}
+end
+
+eltype(A::INTEL_KrylovOperator{T}) where T = T
+size(A::INTEL_KrylovOperator) = (A.m, A.n)
+
+for (SparseMatrixType, BlasType) in ((:(oneSparseMatrixCSR{T}), :BlasFloat),)
+    @eval begin
+        function KP.KrylovOperator(A::$SparseMatrixType; nrhs::Int=1, transa::Char='N') where T <: $BlasType
+            m,n = size(A)
+            if nrhs == 1
+                oneMKL.sparse_optimize_gemv!(transa, A)
+            end
+            # sparse_optimize_gemm! is only available with oneAPI 2024.1.0
+            return INTEL_KrylovOperator{T}(T, m, n, nrhs, transa, A)
+        end
+
+        function KP.update!(A::INTEL_KrylovOperator{T}, B::$SparseMatrixType) where T <: $BlasFloat
+            error("The update of an INTEL_KrylovOperator is not supported.")
+        end
+    end
+end
+
+function LinearAlgebra.mul!(y::oneVector{T}, A::INTEL_KrylovOperator{T}, x::oneVector{T}) where T <: BlasFloat
+    (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
+    (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
+    (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
+    alpha = one(T)
+    beta = zero(T)
+    oneMKL.sparse_gemv!(A.transa, alpha, A.matrix, x, beta, y)
+end
+
+function LinearAlgebra.mul!(Y::oneMatrix{T}, A::INTEL_KrylovOperator{T}, X::oneMatrix{T}) where T <: BlasFloat
+    mY, nY = size(Y)
+    mX, nX = size(X)
+    (mY != A.m) && throw(DimensionMismatch("mY != A.m"))
+    (mX != A.n) && throw(DimensionMismatch("mX != A.n"))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
+    alpha = one(T)
+    beta = zero(T)
+    oneMKL.sparse_gemm!(A.transa, 'N', alpha, A.matrix, X, beta, Y)
+end
+
+mutable struct INTEL_TriangularOperator{T} <: AbstractTriangularOperator{T}
+    type::Type{T}
+    m::Int
+    n::Int
+    nrhs::Int
+    uplo::Char
+    diag::Char
+    transa::Char
+    matrix::oneSparseMatrixCSR{T}
+end
+
+eltype(A::INTEL_TriangularOperator{T}) where T = T
+size(A::INTEL_TriangularOperator) = (A.m, A.n)
+
+for (SparseMatrixType, BlasType) in ((:(oneSparseMatrixCSR{T}), :BlasFloat),)
+    @eval begin
+        function KP.TriangularOperator(A::$SparseMatrixType, uplo::Char, diag::Char; nrhs::Int=1, transa::Char='N') where T <: $BlasType
+            m,n = size(A)
+            if nrhs == 1
+                oneMKL.sparse_optimize_trsv!(uplo, transa, diag, A)
+            end
+            # sparse_optimize_trsm! is only available with oneAPI 2024.1.0
+            return INTEL_TriangularOperator{T}(T, m, n, nrhs, uplo, diag, transa, A)
+        end
+
+        function KP.update!(A::INTEL_TriangularOperator{T}, B::$SparseMatrixType) where T <: $BlasFloat
+            return error("The update of an INTEL_TriangularOperator is not supported.")
+        end
+    end
+end
+
+function LinearAlgebra.ldiv!(y::oneVector{T}, A::INTEL_TriangularOperator{T}, x::oneVector{T}) where T <: BlasFloat
+    (length(y) != A.m) && throw(DimensionMismatch("length(y) != A.m"))
+    (length(x) != A.n) && throw(DimensionMismatch("length(x) != A.n"))
+    (A.nrhs == 1) || throw(DimensionMismatch("A.nrhs != 1"))
+    oneMKL.sparse_trsv!(A.uplo, A.transa, A.diag, A.matrix, x, y)
+end
+
+function LinearAlgebra.ldiv!(Y::oneMatrix{T}, A::INTEL_TriangularOperator{T}, X::oneMatrix{T}) where T <: BlasFloat
+    mY, nY = size(Y)
+    mX, nX = size(X)
+    (mY != A.m) && throw(DimensionMismatch("mY != A.m"))
+    (mX != A.n) && throw(DimensionMismatch("mX != A.n"))
+    (nY == nX == A.nrhs) || throw(DimensionMismatch("nY != A.nrhs or nX != A.nrhs"))
+    error("The routine sparse_trsm! is only available with oneAPI 2024.1.0")
+end
diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl
index 498f913..996df90 100644
--- a/test/gpu/gpu.jl
+++ b/test/gpu/gpu.jl
@@ -90,41 +90,45 @@ function test_operator(FC, V, DM, SM)
     mul!(y_gpu, opA_gpu, x_gpu)
     @test collect(y_gpu) ≈ y_cpu
   end
-  for j = 1:5
-    y_cpu = rand(FC, m)
-    x_cpu = rand(FC, n)
-    A_cpu2 = A_cpu + j*I
-    mul!(y_cpu, A_cpu2, x_cpu)
-    y_gpu = V(y_cpu)
-    x_gpu = V(x_cpu)
-    A_gpu2 = SM(A_cpu2)
-    update!(opA_gpu, A_gpu2)
-    mul!(y_gpu, opA_gpu, x_gpu)
-    @test collect(y_gpu) ≈ y_cpu
+  if V.body.name.name != :oneArray
+    for j = 1:5
+      y_cpu = rand(FC, m)
+      x_cpu = rand(FC, n)
+      A_cpu2 = A_cpu + j*I
+      mul!(y_cpu, A_cpu2, x_cpu)
+      y_gpu = V(y_cpu)
+      x_gpu = V(x_cpu)
+      A_gpu2 = SM(A_cpu2)
+      update!(opA_gpu, A_gpu2)
+      mul!(y_gpu, opA_gpu, x_gpu)
+      @test collect(y_gpu) ≈ y_cpu
+    end
   end
 
-  nrhs = 3
-  opA_gpu = KrylovOperator(A_gpu; nrhs)
-  for i = 1:5
-    Y_cpu = rand(FC, m, nrhs)
-    X_cpu = rand(FC, n, nrhs)
-    mul!(Y_cpu, A_cpu, X_cpu)
-    Y_gpu = DM(Y_cpu)
-    X_gpu = DM(X_cpu)
-    mul!(Y_gpu, opA_gpu, X_gpu)
-    @test collect(Y_gpu) ≈ Y_cpu
-  end
-  for j = 1:5
-    Y_cpu = rand(FC, m, nrhs)
-    X_cpu = rand(FC, n, nrhs)
-    A_cpu2 = A_cpu + j*I
-    mul!(Y_cpu, A_cpu2, X_cpu)
-    Y_gpu = DM(Y_cpu)
-    X_gpu = DM(X_cpu)
-    A_gpu2 = SM(A_cpu2)
-    update!(opA_gpu, A_gpu2)
-    mul!(Y_gpu, opA_gpu, X_gpu)
-    @test collect(Y_gpu) ≈ Y_cpu
+  if V.body.name.name != :oneArray
+    nrhs = 3
+    opA_gpu = KrylovOperator(A_gpu; nrhs)
+    for i = 1:5
+      Y_cpu = rand(FC, m, nrhs)
+      X_cpu = rand(FC, n, nrhs)
+      mul!(Y_cpu, A_cpu, X_cpu)
+      Y_gpu = DM(Y_cpu)
+      X_gpu = DM(X_cpu)
+      mul!(Y_gpu, opA_gpu, X_gpu)
+      @test collect(Y_gpu) ≈ Y_cpu
+    end
+    for j = 1:5
+      Y_cpu = rand(FC, m, nrhs)
+      X_cpu = rand(FC, n, nrhs)
+      A_cpu2 = A_cpu + j*I
+      mul!(Y_cpu, A_cpu2, X_cpu)
+      Y_gpu = DM(Y_cpu)
+      X_gpu = DM(X_cpu)
+      A_gpu2 = SM(A_cpu2)
+      update!(opA_gpu, A_gpu2)
+      mul!(Y_gpu, opA_gpu, X_gpu)
+      @test collect(Y_gpu) ≈ Y_cpu
+    end
   end
 end
 
@@ -152,43 +156,47 @@ function test_triangular(FC, V, DM, SM)
       ldiv!(y_gpu, opA_gpu, x_gpu)
       @test collect(y_gpu) ≈ y_cpu
     end
-    for j = 1:5
-      y_cpu = rand(FC, n)
-      x_cpu = rand(FC, n)
-      A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
-      ldiv!(y_cpu, triangle(A_cpu2), x_cpu)
-      y_gpu = V(y_cpu)
-      x_gpu = V(x_cpu)
-      A_gpu2 = SM(A_cpu2)
-      update!(opA_gpu, A_gpu2)
-      ldiv!(y_gpu, opA_gpu, x_gpu)
-      @test collect(y_gpu) ≈ y_cpu
+    if V.body.name.name != :oneArray
+      for j = 1:5
+        y_cpu = rand(FC, n)
+        x_cpu = rand(FC, n)
+        A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
+        ldiv!(y_cpu, triangle(A_cpu2), x_cpu)
+        y_gpu = V(y_cpu)
+        x_gpu = V(x_cpu)
+        A_gpu2 = SM(A_cpu2)
+        update!(opA_gpu, A_gpu2)
+        ldiv!(y_gpu, opA_gpu, x_gpu)
+        @test collect(y_gpu) ≈ y_cpu
+      end
     end
 
-    nrhs = 3
-    opA_gpu = TriangularOperator(A_gpu, uplo, diag; nrhs)
-    for i = 1:5
-      Y_cpu = rand(FC, n, nrhs)
-      X_cpu = rand(FC, n, nrhs)
-      ldiv!(Y_cpu, triangle(A_cpu), X_cpu)
-      Y_gpu = DM(Y_cpu)
-      X_gpu = DM(X_cpu)
-      ldiv!(Y_gpu, opA_gpu, X_gpu)
-      @test collect(Y_gpu) ≈ Y_cpu
-    end
-    if V.body.name.name != :CuArray
-      for j = 1:5
+    if V.body.name.name != :oneArray
+      nrhs = 3
+      opA_gpu = TriangularOperator(A_gpu, uplo, diag; nrhs)
+      for i = 1:5
         Y_cpu = rand(FC, n, nrhs)
         X_cpu = rand(FC, n, nrhs)
-        A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
-        ldiv!(Y_cpu, triangle(A_cpu2), X_cpu)
+        ldiv!(Y_cpu, triangle(A_cpu), X_cpu)
         Y_gpu = DM(Y_cpu)
         X_gpu = DM(X_cpu)
-        A_gpu2 = SM(A_cpu2)
-        update!(opA_gpu, A_gpu2)
         ldiv!(Y_gpu, opA_gpu, X_gpu)
         @test collect(Y_gpu) ≈ Y_cpu
       end
+      if V.body.name.name != :CuArray
+        for j = 1:5
+          Y_cpu = rand(FC, n, nrhs)
+          X_cpu = rand(FC, n, nrhs)
+          A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1)
+          ldiv!(Y_cpu, triangle(A_cpu2), X_cpu)
+          Y_gpu = DM(Y_cpu)
+          X_gpu = DM(X_cpu)
+          A_gpu2 = SM(A_cpu2)
+          update!(opA_gpu, A_gpu2)
+          ldiv!(Y_gpu, opA_gpu, X_gpu)
+          @test collect(Y_gpu) ≈ Y_cpu
+        end
+      end
     end
   end
 end
diff --git a/test/gpu/intel.jl b/test/gpu/intel.jl
index 199bd08..966780c 100644
--- a/test/gpu/intel.jl
+++ b/test/gpu/intel.jl
@@ -9,6 +9,18 @@ include("gpu.jl")
   @test oneAPI.functional()
   oneAPI.allowscalar(false)
 
+  @testset "KrylovOperator" begin
+    @testset "oneSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
+      test_operator(FC, oneVector{FC}, oneMatrix{FC}, oneSparseMatrixCSR)
+    end
+  end
+
+  @testset "TriangularOperator" begin
+    @testset "oneSparseMatrixCSR -- $FC" for FC in (Float64, ComplexF64)
+      test_triangular(FC, oneVector{FC}, oneMatrix{FC}, oneSparseMatrixCSR)
+    end
+  end
+
   @testset "Block Jacobi preconditioner" begin
     test_block_jacobi(oneAPIBackend(), oneArray, oneSparseMatrixCSR)
   end