diff --git a/ext/AMDGPU/operators.jl b/ext/AMDGPU/operators.jl index a88bb6a..a2c2632 100644 --- a/ext/AMDGPU/operators.jl +++ b/ext/AMDGPU/operators.jl @@ -102,8 +102,8 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat), m,n = size(A) alpha = Ref{T}(one(T)) descA = rocSPARSE.ROCSparseMatrixDescriptor(A, 'O') - rocsparse_uplo = Ref{rocSPARSE.rocsparse_diag_type}(uplo) - rocsparse_diag = Ref{rocSPARSE.rocsparse_matrix_type}(diag) + rocsparse_uplo = Ref{rocSPARSE.rocsparse_fill_mode}(uplo) + rocsparse_diag = Ref{rocSPARSE.rocsparse_diag_type}(diag) rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_fill_mode, rocsparse_uplo, Csize_t(sizeof(rocsparse_uplo))) rocSPARSE.rocsparse_spmat_set_attribute(descA, rocSPARSE.rocsparse_spmat_diag_type, rocsparse_diag, Csize_t(sizeof(rocsparse_diag))) if nrhs == 1 @@ -122,10 +122,10 @@ for (SparseMatrixType, BlasType) in ((:(ROCSparseMatrixCSR{T}), :BlasFloat), descY = rocSPARSE.ROCDenseMatrixDescriptor(T, m, nrhs) algo = rocSPARSE.rocsparse_spsm_alg_default buffer_size = Ref{Csize_t}() - rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, rocSPARSE.rocsparse_spsm_stage_buffer_size, buffer_size, C_NULL) buffer = ROCVector{UInt8}(undef, buffer_size[]) - rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, alpha, descA, descX, descY, T, algo, + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), transa, 'N', alpha, descA, descX, descY, T, algo, rocSPARSE.rocsparse_spsm_stage_preprocess, buffer_size, buffer) return AMD_TriangularOperator{T}(T, m, n, nrhs, transa, descA, buffer_size, buffer) end @@ -161,6 +161,6 @@ function LinearAlgebra.ldiv!(Y::ROCMatrix{T}, A::AMD_TriangularOperator{T}, X::R descX = rocSPARSE.ROCDenseMatrixDescriptor(X) algo = rocSPARSE.rocsparse_spsm_alg_default alpha = Ref{T}(one(T)) - rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), A.transa, alpha, A.descA, descX, descY, T, + rocSPARSE.rocsparse_spsm(rocSPARSE.handle(), A.transa, 'N', alpha, A.descA, descX, descY, T, algo, rocSPARSE.rocsparse_spsm_stage_compute, A.buffer_size, A.buffer) end diff --git a/test/gpu/gpu.jl b/test/gpu/gpu.jl index 20ddcb8..498f913 100644 --- a/test/gpu/gpu.jl +++ b/test/gpu/gpu.jl @@ -152,19 +152,17 @@ function test_triangular(FC, V, DM, SM) ldiv!(y_gpu, opA_gpu, x_gpu) @test collect(y_gpu) ≈ y_cpu end - if diag == 'N' - for j = 1:5 - y_cpu = rand(FC, n) - x_cpu = rand(FC, n) - A_cpu2 = A_cpu + j*I - ldiv!(y_cpu, triangle(A_cpu2), x_cpu) - y_gpu = V(y_cpu) - x_gpu = V(x_cpu) - A_gpu2 = SM(A_cpu2) - update!(opA_gpu, A_gpu2) - ldiv!(y_gpu, opA_gpu, x_gpu) - @test collect(y_gpu) ≈ y_cpu - end + for j = 1:5 + y_cpu = rand(FC, n) + x_cpu = rand(FC, n) + A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1) + ldiv!(y_cpu, triangle(A_cpu2), x_cpu) + y_gpu = V(y_cpu) + x_gpu = V(x_cpu) + A_gpu2 = SM(A_cpu2) + update!(opA_gpu, A_gpu2) + ldiv!(y_gpu, opA_gpu, x_gpu) + @test collect(y_gpu) ≈ y_cpu end nrhs = 3 @@ -178,11 +176,11 @@ function test_triangular(FC, V, DM, SM) ldiv!(Y_gpu, opA_gpu, X_gpu) @test collect(Y_gpu) ≈ Y_cpu end - if diag == 'N' && V.body.name.name != :CuArray + if V.body.name.name != :CuArray for j = 1:5 Y_cpu = rand(FC, n, nrhs) X_cpu = rand(FC, n, nrhs) - A_cpu2 = A_cpu + j*I + A_cpu2 = A_cpu + j*tril(A_cpu,-1) + j*triu(A_cpu,1) ldiv!(Y_cpu, triangle(A_cpu2), X_cpu) Y_gpu = DM(Y_cpu) X_gpu = DM(X_cpu)