From 206ef971e4c20cc32516b148126a322dddf9cdf6 Mon Sep 17 00:00:00 2001 From: Alexis Montoison <35051714+amontoison@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:06:29 -0500 Subject: [PATCH] Add the code of IncompleteLU.jl in KrylovPreconditioners.jl (#56) --- .buildkite/pipeline.yml | 10 +- Project.toml | 16 +- src/KrylovPreconditioners.jl | 1 + src/ilu/IncompleteLU.jl | 15 ++ src/ilu/application.jl | 221 +++++++++++++++++++++ src/ilu/crout_ilu.jl | 113 +++++++++++ src/ilu/insertion_sort_update_vector.jl | 92 +++++++++ src/ilu/linked_list.jl | 60 ++++++ src/ilu/sorted_set.jl | 78 ++++++++ src/ilu/sparse_vector_accumulator.jl | 131 +++++++++++++ test/ilu/application.jl | 235 +++++++++++++++++++++++ test/ilu/crout_ilu.jl | 35 ++++ test/ilu/ilu.jl | 6 + test/ilu/insertion_sort_update_vector.jl | 59 ++++++ test/ilu/linked_list.jl | 48 +++++ test/ilu/sorted_set.jl | 41 ++++ test/ilu/sparse_vector_accumulator.jl | 65 +++++++ test/runtests.jl | 5 + 18 files changed, 1218 insertions(+), 13 deletions(-) create mode 100644 src/ilu/IncompleteLU.jl create mode 100644 src/ilu/application.jl create mode 100644 src/ilu/crout_ilu.jl create mode 100644 src/ilu/insertion_sort_update_vector.jl create mode 100644 src/ilu/linked_list.jl create mode 100644 src/ilu/sorted_set.jl create mode 100644 src/ilu/sparse_vector_accumulator.jl create mode 100644 test/ilu/application.jl create mode 100644 test/ilu/crout_ilu.jl create mode 100644 test/ilu/ilu.jl create mode 100644 test/ilu/insertion_sort_update_vector.jl create mode 100644 test/ilu/linked_list.jl create mode 100644 test/ilu/sorted_set.jl create mode 100644 test/ilu/sparse_vector_accumulator.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 878e0a3..2124645 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -2,7 +2,7 @@ steps: - label: "Nvidia GPUs -- CUDA.jl" plugins: - JuliaCI/julia#v1: - version: 1.9 + version: "1.10" agents: queue: "juliagpu" cuda: "*" @@ -20,7 +20,7 @@ steps: - label: "AMD GPUs -- AMDGPU.jl" plugins: - JuliaCI/julia#v1: - version: 1.9 + version: "1.10" agents: queue: "juliagpu" rocm: "*" @@ -44,7 +44,7 @@ steps: - label: "Intel GPUs -- oneAPI.jl" plugins: - JuliaCI/julia#v1: - version: 1.9 + version: "1.10" agents: queue: "juliagpu" intel: "*" @@ -52,8 +52,8 @@ steps: julia --color=yes --project=test/gpu -e ' using Pkg Pkg.develop(path=".") - Pkg.add("oneAPI") - # Pkg.add(url="https://github.com/JuliaGPU/oneAPI.jl", rev="master") + # Pkg.add("oneAPI") + Pkg.add(url="https://github.com/JuliaGPU/oneAPI.jl", rev="master") Pkg.add("Krylov") Pkg.instantiate() include("test/gpu/intel.jl")' diff --git a/Project.toml b/Project.toml index 065c820..add609b 100644 --- a/Project.toml +++ b/Project.toml @@ -22,19 +22,19 @@ KrylovPreconditionersCUDAExt = "CUDA" KrylovPreconditionersoneAPIExt = "oneAPI" [compat] -AMDGPU = "0.9" -Adapt = "3, 4" +AMDGPU = "0.9, 1.0" +Adapt = "4" CUDA = "5.3.0" -oneAPI = "1.5.0" +oneAPI = "1.6.0" KernelAbstractions = "0.9" Krylov = "0.9.4" LightGraphs = "1" -LinearAlgebra = "1.9" +LinearAlgebra = "1.10" Metis = "1" -Random = "1.9" -SparseArrays = "1.9" -Test = "1.9" -julia = "1.9" +Random = "1.10" +SparseArrays = "1.10" +Test = "1.10" +julia = "1.10" [extras] Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7" diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl index 7497c80..59ddf5d 100644 --- a/src/KrylovPreconditioners.jl +++ b/src/KrylovPreconditioners.jl @@ -43,6 +43,7 @@ export TriangularOperator include("ic0.jl") include("ilu0.jl") include("blockjacobi.jl") +include("ilu/IncompleteLU.jl") # Scaling include("scaling.jl") diff --git a/src/ilu/IncompleteLU.jl b/src/ilu/IncompleteLU.jl new file mode 100644 index 0000000..dbb22fa --- /dev/null +++ b/src/ilu/IncompleteLU.jl @@ -0,0 +1,15 @@ +using LinearAlgebra: Factorization, AdjointFactorization, LowerTriangular, UnitLowerTriangular, UpperTriangular +using SparseArrays +using Base: @propagate_inbounds + +struct ILUFactorization{Tv,Ti} <: Factorization{Tv} + L::SparseMatrixCSC{Tv,Ti} + U::SparseMatrixCSC{Tv,Ti} +end + +include("sorted_set.jl") +include("linked_list.jl") +include("sparse_vector_accumulator.jl") +include("insertion_sort_update_vector.jl") +include("application.jl") +include("crout_ilu.jl") diff --git a/src/ilu/application.jl b/src/ilu/application.jl new file mode 100644 index 0000000..4fc02d4 --- /dev/null +++ b/src/ilu/application.jl @@ -0,0 +1,221 @@ +import SparseArrays: nnz +import LinearAlgebra: ldiv! +import Base.\ + +export forward_substitution!, backward_substitution! +export adjoint_forward_substitution!, adjoint_backward_substitution! + +""" +Returns the number of nonzeros of the `L` and `U` +factor combined. + +Excludes the unit diagonal of the `L` factor, +which is not stored. +""" +nnz(F::ILUFactorization) = nnz(F.L) + nnz(F.U) + +function ldiv!(F::ILUFactorization, y::AbstractVecOrMat) + forward_substitution!(F, y) + backward_substitution!(F, y) +end + +function ldiv!(F::AdjointFactorization{<:Any,<:ILUFactorization}, y::AbstractVecOrMat) + adjoint_forward_substitution!(F.parent, y) + adjoint_backward_substitution!(F.parent, y) +end + +function ldiv!(y::AbstractVector, F::ILUFactorization, x::AbstractVector) + y .= x + ldiv!(F, y) +end + +function ldiv!(y::AbstractVector, F::AdjointFactorization{<:Any,<:ILUFactorization}, x::AbstractVector) + y .= x + ldiv!(F, y) +end + +function ldiv!(y::AbstractMatrix, F::ILUFactorization, x::AbstractMatrix) + y .= x + ldiv!(F, y) +end + +function ldiv!(y::AbstractMatrix, F::AdjointFactorization{<:Any,<:ILUFactorization}, x::AbstractMatrix) + y .= x + ldiv!(F, y) +end + +(\)(F::ILUFactorization, y::AbstractVecOrMat) = ldiv!(F, copy(y)) +(\)(F::AdjointFactorization{<:Any,<:ILUFactorization}, y::AbstractVecOrMat) = ldiv!(F, copy(y)) + +""" +Applies in-place backward substitution with the U factor of F, under the assumptions: + +1. U is stored transposed / row-wise +2. U has no lower-triangular elements stored +3. U has (nonzero) diagonal elements stored. +""" +function backward_substitution!(F::ILUFactorization, y::AbstractVector) + U = F.U + @inbounds for col = U.n : -1 : 1 + + # Substitutions + for idx = U.colptr[col + 1] - 1 : -1 : U.colptr[col] + 1 + y[col] -= U.nzval[idx] * y[U.rowval[idx]] + end + + # Final value for y[col] + y[col] /= U.nzval[U.colptr[col]] + end + + y +end + +function backward_substitution!(F::ILUFactorization, y::AbstractMatrix) + U = F.U + p = size(y, 2) + + @inbounds for c = 1 : p + @inbounds for col = U.n : -1 : 1 + + # Substitutions + for idx = U.colptr[col + 1] - 1 : -1 : U.colptr[col] + 1 + y[col,c] -= U.nzval[idx] * y[U.rowval[idx],c] + end + + # Final value for y[col,c] + y[col,c] /= U.nzval[U.colptr[col]] + end + end + + y +end + +function backward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector) + v .= y + backward_substitution!(F, v) +end + +function backward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix) + v .= y + backward_substitution!(F, v) +end + +function adjoint_backward_substitution!(F::ILUFactorization, y::AbstractVector) + L = F.L + @inbounds for col = L.n - 1 : -1 : 1 + # Substitutions + for idx = L.colptr[col + 1] - 1 : -1 : L.colptr[col] + y[col] -= L.nzval[idx] * y[L.rowval[idx]] + end + end + + y +end + +function adjoint_backward_substitution!(F::ILUFactorization, y::AbstractMatrix) + L = F.L + p = size(y, 2) + @inbounds for c = 1 : p + @inbounds for col = L.n - 1 : -1 : 1 + # Substitutions + for idx = L.colptr[col + 1] - 1 : -1 : L.colptr[col] + y[col,c] -= L.nzval[idx] * y[L.rowval[idx],c] + end + end + end + + y +end + +function adjoint_backward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector) + v .= y + adjoint_backward_substitution!(F, v) +end + +function adjoint_backward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix) + v .= y + adjoint_backward_substitution!(F, v) +end + +""" +Applies in-place forward substitution with the L factor of F, under the assumptions: + +1. L is stored column-wise (unlike U) +2. L has no upper triangular elements +3. L has *no* diagonal elements +""" +function forward_substitution!(F::ILUFactorization, y::AbstractVector) + L = F.L + @inbounds for col = 1 : L.n - 1 + for idx = L.colptr[col] : L.colptr[col + 1] - 1 + y[L.rowval[idx]] -= L.nzval[idx] * y[col] + end + end + + y +end + +function forward_substitution!(F::ILUFactorization, y::AbstractMatrix) + L = F.L + p = size(y, 2) + @inbounds for c = 1 : p + @inbounds for col = 1 : L.n - 1 + for idx = L.colptr[col] : L.colptr[col + 1] - 1 + y[L.rowval[idx],c] -= L.nzval[idx] * y[col,c] + end + end + end + + y +end + +function forward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector) + v .= y + forward_substitution!(F, v) +end + +function forward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix) + v .= y + forward_substitution!(F, v) +end + +function adjoint_forward_substitution!(F::ILUFactorization, y::AbstractVector) + U = F.U + @inbounds for col = 1 : U.n + # Final value for y[col] + y[col] /= U.nzval[U.colptr[col]] + + for idx = U.colptr[col] + 1 : U.colptr[col + 1] - 1 + y[U.rowval[idx]] -= U.nzval[idx] * y[col] + end + end + + y +end + +function adjoint_forward_substitution!(F::ILUFactorization, y::AbstractMatrix) + U = F.U + p = size(y, 2) + @inbounds for c = 1 : p + @inbounds for col = 1 : U.n + # Final value for y[col,c] + y[col,c] /= U.nzval[U.colptr[col]] + + for idx = U.colptr[col] + 1 : U.colptr[col + 1] - 1 + y[U.rowval[idx],c] -= U.nzval[idx] * y[col,c] + end + end + end + + y +end + +function adjoint_forward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector) + v .= y + adjoint_forward_substitution!(F, v) +end + +function adjoint_forward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix) + v .= y + adjoint_forward_substitution!(F, v) +end diff --git a/src/ilu/crout_ilu.jl b/src/ilu/crout_ilu.jl new file mode 100644 index 0000000..3ceadc9 --- /dev/null +++ b/src/ilu/crout_ilu.jl @@ -0,0 +1,113 @@ +export ilu + +function lutype(T::Type) + UT = typeof(oneunit(T) - oneunit(T) * (oneunit(T) / (oneunit(T) + zero(T)))) + LT = typeof(oneunit(UT) / oneunit(UT)) + S = promote_type(T, LT, UT) +end + +function ilu(A::SparseMatrixCSC{ATv,Ti}; τ = 1e-3) where {ATv,Ti} + n = size(A, 1) + + Tv = lutype(ATv) + + L = spzeros(Tv, Ti, n, n) + U = spzeros(Tv, Ti, n, n) + + U_row = SparseVectorAccumulator{Tv,Ti}(n) + L_col = SparseVectorAccumulator{Tv,Ti}(n) + + A_reader = RowReader(A) + L_reader = RowReader(L, Val{false}) + U_reader = RowReader(U, Val{false}) + + @inbounds for k = Ti(1) : Ti(n) + + ## + ## Copy the new row into U_row and the new column into L_col + ## + + col::Int = first_in_row(A_reader, k) + + while is_column(col) + add!(U_row, nzval(A_reader, col), col) + next_col = next_column(A_reader, col) + next_row!(A_reader, col) + + # Check if the next nonzero in this column + # is still above the diagonal + if has_next_nonzero(A_reader, col) && nzrow(A_reader, col) ≤ col + enqueue_next_nonzero!(A_reader, col) + end + + col = next_col + end + + # Copy the remaining part of the column into L_col + axpy!(one(Tv), A, k, nzidx(A_reader, k), L_col) + + ## + ## Combine the vectors: + ## + + # U_row[k:n] -= L[k,i] * U[i,k:n] for i = 1 : k - 1 + col = first_in_row(L_reader, k) + + while is_column(col) + axpy!(-nzval(L_reader, col), U, col, nzidx(U_reader, col), U_row) + + next_col = next_column(L_reader, col) + next_row!(L_reader, col) + + if has_next_nonzero(L_reader, col) + enqueue_next_nonzero!(L_reader, col) + end + + col = next_col + end + + # Nothing is happening here when k = n, maybe remove? + # L_col[k+1:n] -= U[i,k] * L[i,k+1:n] for i = 1 : k - 1 + if k < n + col = first_in_row(U_reader, k) + + while is_column(col) + axpy!(-nzval(U_reader, col), L, col, nzidx(L_reader, col), L_col) + + next_col = next_column(U_reader, col) + next_row!(U_reader, col) + + if has_next_nonzero(U_reader, col) + enqueue_next_nonzero!(U_reader, col) + end + + col = next_col + end + end + + ## + ## Apply a drop rule + ## + + U_diag_element = U_row.nzval[k] + # U_diag_element = U_row.values[k] + + # Append the columns + append_col!(U, U_row, k, τ) + append_col!(L, L_col, k, τ, inv(U_diag_element)) + + # Add the new row and column to U_nonzero_col, L_nonzero_row, U_first, L_first + # (First index *after* the diagonal) + U_reader.next_in_column[k] = U.colptr[k] + 1 + if U.colptr[k] < U.colptr[k + 1] - 1 + enqueue_next_nonzero!(U_reader, k) + end + + L_reader.next_in_column[k] = L.colptr[k] + if L.colptr[k] < L.colptr[k + 1] + enqueue_next_nonzero!(L_reader, k) + end + end + + return ILUFactorization(L, U) +end diff --git a/src/ilu/insertion_sort_update_vector.jl b/src/ilu/insertion_sort_update_vector.jl new file mode 100644 index 0000000..d49d16c --- /dev/null +++ b/src/ilu/insertion_sort_update_vector.jl @@ -0,0 +1,92 @@ +import Base: getindex, setindex!, empty!, Vector +import LinearAlgebra: axpy! + +""" +`InsertableSparseVector` accumulates the sparse vector +result from SpMV. Initialization requires O(N) work, +therefore the data structure is reused. Insertion +requires O(nnz) at worst, as insertion sort is used. +""" +struct InsertableSparseVector{Tv} + values::Vector{Tv} + indices::SortedSet + + InsertableSparseVector{Tv}(n::Int) where {Tv} = new(Vector{Tv}(undef, n), SortedSet(n)) +end + +@propagate_inbounds getindex(v::InsertableSparseVector{Tv}, idx::Int) where {Tv} = v.values[idx] +@propagate_inbounds setindex!(v::InsertableSparseVector{Tv}, value::Tv, idx::Int) where {Tv} = v.values[idx] = value +@inline indices(v::InsertableSparseVector) = Vector(v.indices) + +function Vector(v::InsertableSparseVector{Tv}) where {Tv} + vals = zeros(Tv, v.indices.N - 1) + for index in v.indices + @inbounds vals[index] = v.values[index] + end + return vals +end + +""" +Sets `v[idx] += a` when `idx` is occupied, or sets `v[idx] = a`. +Complexity is O(nnz). The `prev_idx` can be used to start the linear +search at `prev_idx`, useful when multiple already sorted values +are added. +""" +function add!(v::InsertableSparseVector, a, idx::Integer, prev_idx::Integer) + if push!(v.indices, idx, prev_idx) + @inbounds v[idx] = a + else + @inbounds v[idx] += a + end + + v +end + +""" +Add without providing a previous index. +""" +@propagate_inbounds add!(v::InsertableSparseVector, a, idx::Integer) = add!(v, a, idx, v.indices.N) + +function axpy!(a, A::SparseMatrixCSC, column::Integer, start::Integer, y::InsertableSparseVector) + prev_index = y.indices.N + + @inbounds for idx = start : A.colptr[column + 1] - 1 + add!(y, a * A.nzval[idx], A.rowval[idx], prev_index) + prev_index = A.rowval[idx] + end + + y +end + +""" +Empties the InsterableSparseVector in O(1) operations. +""" +@inline empty!(v::InsertableSparseVector) = empty!(v.indices) + +""" +Basically `A[:, j] = scale * drop(y)`, where drop removes +values less than `drop`. + +Resets the `InsertableSparseVector`. + +Note: does *not* update `A.colptr` for columns > j + 1, +as that is done during the steps. +""" +function append_col!(A::SparseMatrixCSC{Tv}, y::InsertableSparseVector{Tv}, j::Int, drop::Tv, scale::Tv = one(Tv)) where {Tv} + + total = 0 + + @inbounds for row = y.indices + if abs(y[row]) ≥ drop || row == j + push!(A.rowval, row) + push!(A.nzval, scale * y[row]) + total += 1 + end + end + + @inbounds A.colptr[j + 1] = A.colptr[j] + total + + empty!(y) + + nothing +end diff --git a/src/ilu/linked_list.jl b/src/ilu/linked_list.jl new file mode 100644 index 0000000..4048ed2 --- /dev/null +++ b/src/ilu/linked_list.jl @@ -0,0 +1,60 @@ +import Base: push! + +""" +The factor L is stored column-wise, but we need +all nonzeros in row `row`. We already keep track of +the first nonzero in each column (at most `n` indices). +Take `l = LinkedLists(n)`. Let `l.head[row]` be the column +of some nonzero in row `row`. Then we can store the column +of the next nonzero of row `row` in `l.next[l.head[row]]`, etc. +That "spot" is empty and there will never be a conflict +because as long as we only store the first nonzero per column: +the column is then a unique identifier. +""" +struct LinkedLists{Ti} + head::Vector{Ti} + next::Vector{Ti} +end + +LinkedLists{Ti}(n::Integer) where {Ti} = LinkedLists(zeros(Ti, n), zeros(Ti, n)) + +""" +For the L-factor: insert in row `head` column `value` +For the U-factor: insert in column `head` row `value` +""" +@propagate_inbounds function push!(l::LinkedLists, head::Integer, value::Integer) + l.head[head], l.next[value] = value, l.head[head] + return l +end + +struct RowReader{Tv,Ti} + A::SparseMatrixCSC{Tv,Ti} + next_in_column::Vector{Ti} + rows::LinkedLists{Ti} +end + +function RowReader(A::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} + n = size(A, 2) + @inbounds next_in_column = [A.colptr[i] for i = 1 : n] + rows = LinkedLists{Ti}(n) + @inbounds for i = Ti(1) : Ti(n) + push!(rows, A.rowval[A.colptr[i]], i) + end + return RowReader(A, next_in_column, rows) +end + +function RowReader(A::SparseMatrixCSC{Tv,Ti}, initialize::Type{Val{false}}) where {Tv,Ti} + n = size(A, 2) + return RowReader(A, zeros(Ti, n), LinkedLists{Ti}(n)) +end + +@propagate_inbounds nzidx(r::RowReader, column::Integer) = r.next_in_column[column] +@propagate_inbounds nzrow(r::RowReader, column::Integer) = r.A.rowval[nzidx(r, column)] +@propagate_inbounds nzval(r::RowReader, column::Integer) = r.A.nzval[nzidx(r, column)] + +@propagate_inbounds has_next_nonzero(r::RowReader, column::Integer) = nzidx(r, column) < r.A.colptr[column + 1] +@propagate_inbounds enqueue_next_nonzero!(r::RowReader, column::Integer) = push!(r.rows, nzrow(r, column), column) +@propagate_inbounds next_column(r::RowReader, column::Integer) = r.rows.next[column] +@propagate_inbounds first_in_row(r::RowReader, row::Integer) = r.rows.head[row] +@propagate_inbounds is_column(column::Integer) = column != 0 +@propagate_inbounds next_row!(r::RowReader, column::Integer) = r.next_in_column[column] += 1 diff --git a/src/ilu/sorted_set.jl b/src/ilu/sorted_set.jl new file mode 100644 index 0000000..09ae78c --- /dev/null +++ b/src/ilu/sorted_set.jl @@ -0,0 +1,78 @@ +import Base: iterate, push!, Vector, getindex, setindex!, show, empty! + +""" +SortedSet keeps track of a sorted set of integers ≤ N +using insertion sort with a linked list structure in a pre-allocated +vector. Requires O(N + 1) memory. Insertion goes via a linear scan in O(n) +where `n` is the number of stored elements, but can be accelerated +by passing along a known value in the set (which is useful when pushing +in an already sorted list). The insertion itself requires O(1) operations +due to the linked list structure. Provides iterators: +```julia +ints = SortedSet(10) +push!(ints, 5) +push!(ints, 3) +for value in ints + println(value) +end +``` +""" +struct SortedSet + next::Vector{Int} + N::Int + + function SortedSet(N::Int) + next = Vector{Int}(undef, N + 1) + @inbounds next[N + 1] = N + 1 + new(next, N + 1) + end +end + +# Convenience wrappers for indexing +@propagate_inbounds getindex(s::SortedSet, i::Int) = s.next[i] +@propagate_inbounds setindex!(s::SortedSet, value::Int, i::Int) = s.next[i] = value + +# Iterate in +@inline function iterate(s::SortedSet, p::Int = s.N) + @inbounds nxt = s[p] + return nxt == s.N ? nothing : (nxt, nxt) +end + +show(io::IO, s::SortedSet) = print(io, typeof(s), " with values ", Vector(s)) + +""" +For debugging and testing +""" +function Vector(s::SortedSet) + v = Int[] + for index in s + push!(v, index) + end + return v +end + +""" +Insert `index` after a known value `after` +""" +function push!(s::SortedSet, value::Int, after::Int) + @inbounds begin + while s[after] < value + after = s[after] + end + + if s[after] == value + return false + end + + s[after], s[value] = value, s[after] + + return true + end +end + +""" +Make the head pointer do a self-loop. +""" +@inline empty!(s::SortedSet) = s[s.N] = s.N + +@inline push!(s::SortedSet, index::Int) = push!(s, index, s.N) diff --git a/src/ilu/sparse_vector_accumulator.jl b/src/ilu/sparse_vector_accumulator.jl new file mode 100644 index 0000000..dd06fc0 --- /dev/null +++ b/src/ilu/sparse_vector_accumulator.jl @@ -0,0 +1,131 @@ +import Base: setindex!, empty!, Vector +import LinearAlgebra: axpy! + +""" +`SparseVectorAccumulator` accumulates the sparse vector +resulting from SpMV. Initialization requires O(N) work, +therefore the data structure is reused. Insertion is O(1). +Note that `nzind` is unordered. Also note that there is +wasted space: `nzind` could be a growing list. Pre-allocation +seems faster though. + +SparseVectorAccumulator incorporates the multiple switch technique +by Gustavson (1976), which makes resetting an O(1) operation rather +than O(nnz): the `curr` value is used to flag the occupied indices, +and `curr` is increased at each reset. + +occupied = [0, 1, 0, 1, 0, 0, 0] +nzind = [2, 4, 0, 0, 0, 0] +nzval = [0., .1234, 0., .435, 0., 0., 0.] +nnz = 2 +length = 7 +curr = 1 +""" +mutable struct SparseVectorAccumulator{Tv,Ti} + occupied::Vector{Ti} + nzind::Vector{Ti} + nzval::Vector{Tv} + nnz::Ti + length::Ti + curr::Ti + + return SparseVectorAccumulator{Tv,Ti}(N::Integer) where {Tv,Ti} = new( + zeros(Ti, N), + Vector{Ti}(undef, N), + Vector{Tv}(undef, N), + 0, + N, + 1 + ) +end + +function Vector(v::SparseVectorAccumulator{T}) where {T} + x = zeros(T, v.length) + @inbounds x[v.nzind[1 : v.nnz]] = v.nzval[v.nzind[1 : v.nnz]] + return x +end + +""" +Add a part of a SparseMatrixCSC column to a SparseVectorAccumulator, +starting at a given index until the end. +""" +function axpy!(a, A::SparseMatrixCSC, column, start, y::SparseVectorAccumulator) + # Loop over the whole column of A + @inbounds for idx = start : A.colptr[column + 1] - 1 + add!(y, a * A.nzval[idx], A.rowval[idx]) + end + + return y +end + +""" +Sets `v[idx] += a` when `idx` is occupied, or sets `v[idx] = a`. +Complexity is O(1). +""" +function add!(v::SparseVectorAccumulator, a, idx) + @inbounds begin + if isoccupied(v, idx) + v.nzval[idx] += a + else + v.nnz += 1 + v.occupied[idx] = v.curr + v.nzval[idx] = a + v.nzind[v.nnz] = idx + end + end + + return nothing +end + +""" +Check whether `idx` is nonzero. +""" +@propagate_inbounds isoccupied(v::SparseVectorAccumulator, idx::Integer) = v.occupied[idx] == v.curr + +""" +Empty the SparseVectorAccumulator in O(1) operations. +""" +@inline function empty!(v::SparseVectorAccumulator) + v.curr += 1 + v.nnz = 0 +end + +""" +Basically `A[:, j] = scale * drop(y)`, where drop removes +values less than `drop`. Note: sorts the `nzind`'s of `y`, +so that the column can be appended to a SparseMatrixCSC. + +Resets the `SparseVectorAccumulator`. + +Note: does *not* update `A.colptr` for columns > j + 1, +as that is done during the steps. +""" +function append_col!(A::SparseMatrixCSC, y::SparseVectorAccumulator, j::Integer, drop, scale = one(eltype(A))) + # Move the indices of interest up front + total = 0 + + @inbounds for idx = 1 : y.nnz + row = y.nzind[idx] + value = y.nzval[row] + + if abs(value) ≥ drop || row == j + total += 1 + y.nzind[total] = row + end + end + + # Sort the retained values. + sort!(y.nzind, 1, total, Base.Sort.QuickSort, Base.Order.Forward) + + @inbounds for idx = 1 : total + row = y.nzind[idx] + push!(A.rowval, row) + push!(A.nzval, scale * y.nzval[row]) + end + + @inbounds A.colptr[j + 1] = A.colptr[j] + total + + empty!(y) + + return nothing +end diff --git a/test/ilu/application.jl b/test/ilu/application.jl new file mode 100644 index 0000000..c63fa9c --- /dev/null +++ b/test/ilu/application.jl @@ -0,0 +1,235 @@ +using Test +using KrylovPreconditioners: ILUFactorization, forward_substitution!, backward_substitution! +using LinearAlgebra + +@testset "Forward and backward substitutions" begin + function test_fw_substitution(F::ILUFactorization) + A = F.L + n = size(A, 1) + x = rand(n) + y = copy(x) + v = zeros(n) + + forward_substitution!(v, F, x) + forward_substitution!(F, x) + ldiv!(UnitLowerTriangular(A), y) + + @test v ≈ y + @test x ≈ y + + x = rand(n, 5) + y = copy(x) + v = zeros(n, 5) + + forward_substitution!(v, F, x) + forward_substitution!(F, x) + ldiv!(UnitLowerTriangular(A), y) + + @test v ≈ y + @test x ≈ y + end + + function test_bw_substitution(F::ILUFactorization) + A = F.U + n = size(A, 1) + x = rand(n) + y = copy(x) + v = zeros(n) + + backward_substitution!(v, F, x) + backward_substitution!(F, x) + ldiv!(UpperTriangular(A'), y) + + @test v ≈ y + @test x ≈ y + + x = rand(n, 5) + y = copy(x) + v = zeros(n, 5) + + backward_substitution!(v, F, x) + backward_substitution!(F, x) + ldiv!(UpperTriangular(A'), y) + + @test v ≈ y + @test x ≈ y + end + + L = sparse(tril(rand(10, 10), -1)) + U = sparse(tril(rand(10, 10)) + 10I) + F = ILUFactorization(L, U) + test_fw_substitution(F) + test_bw_substitution(F) + + L = sparse(tril(tril(sprand(10, 10, .5), -1))) + U = sparse(tril(sprand(10, 10, .5) + 10I)) + F = ILUFactorization(L, U) + test_fw_substitution(F) + test_bw_substitution(F) + + L = spzeros(10, 10) + U = spzeros(10, 10) + 10I + F = ILUFactorization(L, U) + test_fw_substitution(F) + test_bw_substitution(F) +end + +@testset "Adjoint -- Forward and backward substitutions" begin + function test_adjoint_fw_substitution(F::ILUFactorization) + A = F.U + n = size(A, 1) + x = rand(n) + y = copy(x) + v = zeros(n) + + adjoint_forward_substitution!(v, F, x) + adjoint_forward_substitution!(F, x) + ldiv!(LowerTriangular(A), y) + + @test v ≈ y + @test x ≈ y + + x = rand(n, 5) + x2 = copy(x) + y = copy(x) + v = zeros(n, 5) + + adjoint_forward_substitution!(v, F, x) + adjoint_forward_substitution!(F, x) + ldiv!(LowerTriangular(A), y) + + @test v ≈ y + @test x ≈ y + end + + function test_adjoint_bw_substitution(F::ILUFactorization) + A = F.L + n = size(A, 1) + x = rand(n) + y = copy(x) + v = zeros(n) + + adjoint_backward_substitution!(v, F, x) + adjoint_backward_substitution!(F, x) + ldiv!(UnitLowerTriangular(A)', y) + + @test v ≈ y + @test x ≈ y + + x = rand(n, 5) + y = copy(x) + v = zeros(n, 5) + + adjoint_backward_substitution!(v, F, x) + adjoint_backward_substitution!(F, x) + ldiv!(UnitLowerTriangular(A)', y) + + @test v ≈ y + @test x ≈ y + end + + L = sparse(tril(rand(10, 10), -1)) + U = sparse(tril(rand(10, 10)) + 10I) + F = ILUFactorization(L, U) + test_adjoint_fw_substitution(F) + test_adjoint_bw_substitution(F) + + L = sparse(tril(tril(sprand(10, 10, .5), -1))) + U = sparse(tril(sprand(10, 10, .5) + 10I)) + F = ILUFactorization(L, U) + test_adjoint_fw_substitution(F) + test_adjoint_bw_substitution(F) + + L = spzeros(10, 10) + U = spzeros(10, 10) + 10I + F = ILUFactorization(L, U) + test_adjoint_fw_substitution(F) + test_adjoint_bw_substitution(F) +end + +@testset "ldiv!" begin + function test_ldiv!(L, U) + LU = ILUFactorization(L, U) + x = rand(size(LU.L, 1)) + y = copy(x) + z = copy(x) + w = copy(x) + + ldiv!(LU, x) + ldiv!(UnitLowerTriangular(LU.L), y) + ldiv!(UpperTriangular(LU.U'), y) + + @test x ≈ y + @test LU \ z == x + + ldiv!(w, LU, z) + + @test w == x + + x = rand(size(LU.L, 1), 5) + y = copy(x) + z = copy(x) + w = copy(x) + + ldiv!(LU, x) + ldiv!(UnitLowerTriangular(LU.L), y) + ldiv!(UpperTriangular(LU.U'), y) + + @test x ≈ y + @test LU \ z == x + + ldiv!(w, LU, z) + + @test w == x + end + + test_ldiv!(tril(sprand(10, 10, .5), -1), tril(sprand(10, 10, .5) + 10I)) +end + +@testset "Adjoint -- ldiv!" begin + function test_adjoint_ldiv!(L, U) + LU = ILUFactorization(L, U) + ALU = adjoint(LU) + + x = rand(size(LU.L, 1)) + y = copy(x) + z = copy(x) + w = copy(x) + + ldiv!(ALU, x) + ldiv!(LowerTriangular(LU.U), y) + ldiv!(UnitLowerTriangular(LU.L)', y) + + @test x ≈ y + @test ALU \ z == x + + ldiv!(w, ALU, z) + + @test w == x + + x = rand(size(LU.L, 1), 5) + y = copy(x) + z = copy(x) + w = copy(x) + + ldiv!(ALU, x) + ldiv!(LowerTriangular(LU.U), y) + ldiv!(UnitLowerTriangular(LU.L)', y) + + @test x ≈ y + @test ALU \ z == x + + ldiv!(w, ALU, z) + + @test w == x + end + + test_adjoint_ldiv!(tril(sprand(10, 10, .5), -1), tril(sprand(10, 10, .5) + 10I)) +end + +@testset "nnz" begin + L = tril(sprand(10, 10, .5), -1) + U = tril(sprand(10, 10, .5)) + 10I + LU = ILUFactorization(L, U) + @test nnz(LU) == nnz(L) + nnz(U) +end diff --git a/test/ilu/crout_ilu.jl b/test/ilu/crout_ilu.jl new file mode 100644 index 0000000..e10905c --- /dev/null +++ b/test/ilu/crout_ilu.jl @@ -0,0 +1,35 @@ +using Test +using SparseArrays +using LinearAlgebra + +@testset "Crout ILU" for Tv in (Float64, Float32, ComplexF64, ComplexF32), Ti in (Int64, Int32) + let + # Test if it performs full LU if droptol is zero + A = convert(SparseMatrixCSC{Tv, Ti}, sprand(Tv, 10, 10, .5) + 10I) + ilu = KrylovPreconditioners.ilu(A, τ = 0) + flu = lu(Matrix(A), NoPivot()) + + @test typeof(ilu) == KrylovPreconditioners.ILUFactorization{Tv,Ti} + @test Matrix(ilu.L + I) ≈ flu.L + @test Matrix(transpose(ilu.U)) ≈ flu.U + end + + let + # Test if L = I and U = diag(A) when the droptol is large. + A = convert(SparseMatrixCSC{Tv, Ti}, sprand(10, 10, .5) + 10I) + ilu = KrylovPreconditioners.ilu(A, τ = 1.0) + + @test nnz(ilu.L) == 0 + @test nnz(ilu.U) == 10 + @test diag(ilu.U) == diag(A) + end +end + +@testset "Crout ILU with integer matrix" begin + A = sparse(Int32(1):Int32(10), Int32(1):Int32(10), 1) + ilu = KrylovPreconditioners.ilu(A, τ = 0) + + @test typeof(ilu) == KrylovPreconditioners.ILUFactorization{Float64,Int32} + @test nnz(ilu.L) == 0 + @test diag(ilu.U) == diag(A) +end diff --git a/test/ilu/ilu.jl b/test/ilu/ilu.jl new file mode 100644 index 0000000..eda539a --- /dev/null +++ b/test/ilu/ilu.jl @@ -0,0 +1,6 @@ +include("sorted_set.jl") +include("linked_list.jl") +include("sparse_vector_accumulator.jl") +include("insertion_sort_update_vector.jl") +include("application.jl") +include("crout_ilu.jl") diff --git a/test/ilu/insertion_sort_update_vector.jl b/test/ilu/insertion_sort_update_vector.jl new file mode 100644 index 0000000..e7b64e0 --- /dev/null +++ b/test/ilu/insertion_sort_update_vector.jl @@ -0,0 +1,59 @@ +using Test + +using KrylovPreconditioners: InsertableSparseVector, add!, axpy!, append_col!, indices + +@testset "InsertableSparseVector" begin + @testset "Insertion sorted sparse vector" begin + v = InsertableSparseVector{Float64}(10) + + add!(v, 3.0, 6, 11) + add!(v, 3.0, 3, 11) + add!(v, 3.0, 3, 11) + + @test v[6] == 3.0 + @test v[3] == 6.0 + @test indices(v) == [3, 6] + end + + @testset "Add column of SparseMatrixCSC" begin + v = InsertableSparseVector{Float64}(5) + A = sprand(5, 5, 1.0) + axpy!(2., A, 3, A.colptr[3], v) + axpy!(3., A, 4, A.colptr[4], v) + @test Vector(v) == 2 * A[:, 3] + 3 * A[:, 4] + end + + @testset "Append column to SparseMatrixCSC" begin + A = spzeros(5, 5) + v = InsertableSparseVector{Float64}(5) + + add!(v, 0.3, 1) + add!(v, 0.009, 3) + add!(v, 0.12, 4) + add!(v, 0.007, 5) + append_col!(A, v, 1, 0.1) + + # Test whether the column is copied correctly + # and the dropping rule is applied + @test A[1, 1] == 0.3 + @test A[2, 1] == 0.0 # zero + @test A[3, 1] == 0.0 # dropped + @test A[4, 1] == 0.12 + @test A[5, 1] == 0.0 # dropped + + # Test whether the InsertableSparseVector is reset + # when reusing it for the second column. Also do + # scaling with a factor of 10. + add!(v, 0.5, 2) + add!(v, 0.009, 3) + add!(v, 0.5, 4) + add!(v, 0.007, 5) + append_col!(A, v, 2, 0.1, 10.0) + + @test A[1, 2] == 0.0 # zero + @test A[2, 2] == 5.0 # scaled + @test A[3, 2] == 0.0 # dropped + @test A[4, 2] == 5.0 # scaled + @test A[5, 2] == 0.0 # dropped + end +end diff --git a/test/ilu/linked_list.jl b/test/ilu/linked_list.jl new file mode 100644 index 0000000..2671a38 --- /dev/null +++ b/test/ilu/linked_list.jl @@ -0,0 +1,48 @@ +using Test +using KrylovPreconditioners: LinkedLists, RowReader, first_in_row, is_column, nzval, next_column, + next_row!, has_next_nonzero, enqueue_next_nonzero! +using SparseArrays + +@testset "Linked List" begin + n = 5 + let + lists = LinkedLists{Int}(n) + + # head[2] -> 5 -> nil + # head[5] -> 4 -> 3 -> nil + push!(lists, 5, 3) + push!(lists, 5, 4) + push!(lists, 2, 5) + + @test lists.head[5] == 4 + @test lists.next[4] == 3 + @test lists.next[3] == 0 + + @test lists.head[2] == 5 + @test lists.next[5] == 0 + end +end + +@testset "Read SparseMatrixCSC row by row" begin + # Read a sparse matrix row by row. + n = 10 + A = sprand(n, n, .5) + reader = RowReader(A) + + for row = 1 : n + column = first_in_row(reader, row) + + while is_column(column) + @test nzval(reader, column) == A[row, column] + + next_col = next_column(reader, column) + next_row!(reader, column) + + if has_next_nonzero(reader, column) + enqueue_next_nonzero!(reader, column) + end + + column = next_col + end + end +end diff --git a/test/ilu/sorted_set.jl b/test/ilu/sorted_set.jl new file mode 100644 index 0000000..bc30253 --- /dev/null +++ b/test/ilu/sorted_set.jl @@ -0,0 +1,41 @@ +using Test + +import KrylovPreconditioners: SortedSet, push! + +@testset "Sorted indices" begin + @testset "New values" begin + indices = SortedSet(10) + @test push!(indices, 5) + @test push!(indices, 7) + @test push!(indices, 4) + @test push!(indices, 6) + @test push!(indices, 8) + + as_vec = Vector(indices) + @test as_vec == [4, 5, 6, 7, 8] + end + + @testset "Duplicate values" begin + indices = SortedSet(10) + @test push!(indices, 3) + @test push!(indices, 3) == false + @test push!(indices, 8) + @test push!(indices, 8) == false + @test Vector(indices) == [3, 8] + end + + @testset "Quick insertion with known previous index" begin + indices = SortedSet(10) + @test push!(indices, 3) + @test push!(indices, 4, 3) + @test push!(indices, 8, 4) + @test Vector(indices) == [3, 4, 8] + end + + @testset "Pretty printing" begin + indices = SortedSet(10) + push!(indices, 3) + push!(indices, 2) + @test occursin("with values", sprint(show, indices)) + end +end diff --git a/test/ilu/sparse_vector_accumulator.jl b/test/ilu/sparse_vector_accumulator.jl new file mode 100644 index 0000000..06b5f90 --- /dev/null +++ b/test/ilu/sparse_vector_accumulator.jl @@ -0,0 +1,65 @@ +using KrylovPreconditioners: SparseVectorAccumulator, add!, append_col!, isoccupied +using LinearAlgebra + +@testset "SparseVectorAccumulator" for Ti in (Int32, Int64), Tv in (Float64, Float32) + @testset "Initialization" begin + v = SparseVectorAccumulator{Tv,Ti}(10) + @test iszero(v.nnz) + @test iszero(v.occupied) + end + + @testset "Add to SparseVectorAccumulator" begin + v = SparseVectorAccumulator{Tv,Ti}(3) + add!(v, Tv(1.0), Ti(3)) + add!(v, Tv(1.0), Ti(3)) + add!(v, Tv(3.0), Ti(2)) + @test v.nnz == 2 + @test isoccupied(v, 1) == false + @test isoccupied(v, 2) + @test isoccupied(v, 3) + @test Vector(v) == Tv[0.; 3.0; 2.0] + end + + @testset "Add column of SparseMatrixCSC" begin + # Copy all columns of a + v = SparseVectorAccumulator{Tv,Ti}(5) + A = convert(SparseMatrixCSC{Tv,Ti}, sprand(Tv, 5, 5, 1.0)) + axpy!(Tv(2), A, Ti(3), A.colptr[3], v) + axpy!(Tv(3), A, Ti(4), A.colptr[4], v) + @test Vector(v) == 2 * A[:, 3] + 3 * A[:, 4] + end + + @testset "Append column to SparseMatrixCSC" begin + A = spzeros(Tv, Ti, 5, 5) + v = SparseVectorAccumulator{Tv,Ti}(5) + + add!(v, Tv(0.3), Ti(1)) + add!(v, Tv(0.009), Ti(3)) + add!(v, Tv(0.12), Ti(4)) + add!(v, Tv(0.007), Ti(5)) + append_col!(A, v, Ti(1), Tv(0.1)) + + # Test whether the column is copied correctly + # and the dropping rule is applied + @test A[1, 1] == Tv(0.3) + @test A[2, 1] == Tv(0.0) # zero + @test A[3, 1] == Tv(0.0) # dropped + @test A[4, 1] == Tv(0.12) + @test A[5, 1] == Tv(0.0) # dropped + + # Test whether the InsertableSparseVector is reset + # when reusing it for the second column. Also do + # scaling with a factor of 10. + add!(v, Tv(0.5), Ti(2)) + add!(v, Tv(0.009), Ti(3)) + add!(v, Tv(0.5), Ti(4)) + add!(v, Tv(0.007), Ti(5)) + append_col!(A, v, Ti(2), Tv(0.1), Tv(10.0)) + + @test A[1, 2] == Tv(0.0) # zero + @test A[2, 2] == Tv(5.0) # scaled + @test A[3, 2] == Tv(0.0) # dropped + @test A[4, 2] == Tv(5.0) # scaled + @test A[5, 2] == Tv(0.0) # dropped + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 9a18c02..0092bf9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using AMDGPU using CUDA using oneAPI using Test +using KrylovPreconditioners @testset "KrylovPreconditioners" begin if AMDGPU.functional() @@ -24,4 +25,8 @@ if oneAPI.functional() include("gpu/intel.jl") end end + +@testset "IncompleteLU.jl" begin + include("ilu/ilu.jl") +end end