From 206ef971e4c20cc32516b148126a322dddf9cdf6 Mon Sep 17 00:00:00 2001
From: Alexis Montoison <35051714+amontoison@users.noreply.github.com>
Date: Wed, 25 Sep 2024 00:06:29 -0500
Subject: [PATCH] Add the code of IncompleteLU.jl in KrylovPreconditioners.jl
 (#56)

---
 .buildkite/pipeline.yml                  |  10 +-
 Project.toml                             |  16 +-
 src/KrylovPreconditioners.jl             |   1 +
 src/ilu/IncompleteLU.jl                  |  15 ++
 src/ilu/application.jl                   | 221 +++++++++++++++++++++
 src/ilu/crout_ilu.jl                     | 113 +++++++++++
 src/ilu/insertion_sort_update_vector.jl  |  92 +++++++++
 src/ilu/linked_list.jl                   |  60 ++++++
 src/ilu/sorted_set.jl                    |  78 ++++++++
 src/ilu/sparse_vector_accumulator.jl     | 131 +++++++++++++
 test/ilu/application.jl                  | 235 +++++++++++++++++++++++
 test/ilu/crout_ilu.jl                    |  35 ++++
 test/ilu/ilu.jl                          |   6 +
 test/ilu/insertion_sort_update_vector.jl |  59 ++++++
 test/ilu/linked_list.jl                  |  48 +++++
 test/ilu/sorted_set.jl                   |  41 ++++
 test/ilu/sparse_vector_accumulator.jl    |  65 +++++++
 test/runtests.jl                         |   5 +
 18 files changed, 1218 insertions(+), 13 deletions(-)
 create mode 100644 src/ilu/IncompleteLU.jl
 create mode 100644 src/ilu/application.jl
 create mode 100644 src/ilu/crout_ilu.jl
 create mode 100644 src/ilu/insertion_sort_update_vector.jl
 create mode 100644 src/ilu/linked_list.jl
 create mode 100644 src/ilu/sorted_set.jl
 create mode 100644 src/ilu/sparse_vector_accumulator.jl
 create mode 100644 test/ilu/application.jl
 create mode 100644 test/ilu/crout_ilu.jl
 create mode 100644 test/ilu/ilu.jl
 create mode 100644 test/ilu/insertion_sort_update_vector.jl
 create mode 100644 test/ilu/linked_list.jl
 create mode 100644 test/ilu/sorted_set.jl
 create mode 100644 test/ilu/sparse_vector_accumulator.jl

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 878e0a3..2124645 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -2,7 +2,7 @@ steps:
   - label: "Nvidia GPUs -- CUDA.jl"
     plugins:
       - JuliaCI/julia#v1:
-          version: 1.9
+          version: "1.10"
     agents:
       queue: "juliagpu"
       cuda: "*"
@@ -20,7 +20,7 @@ steps:
   - label: "AMD GPUs -- AMDGPU.jl"
     plugins:
       - JuliaCI/julia#v1:
-          version: 1.9
+          version: "1.10"
     agents:
       queue: "juliagpu"
       rocm: "*"
@@ -44,7 +44,7 @@ steps:
   - label: "Intel GPUs -- oneAPI.jl"
     plugins:
       - JuliaCI/julia#v1:
-          version: 1.9
+          version: "1.10"
     agents:
       queue: "juliagpu"
       intel: "*"
@@ -52,8 +52,8 @@ steps:
       julia --color=yes --project=test/gpu -e '
         using Pkg
         Pkg.develop(path=".")
-        Pkg.add("oneAPI")
-        # Pkg.add(url="https://github.com/JuliaGPU/oneAPI.jl", rev="master")
+        # Pkg.add("oneAPI")
+        Pkg.add(url="https://github.com/JuliaGPU/oneAPI.jl", rev="master")
         Pkg.add("Krylov")
         Pkg.instantiate()
         include("test/gpu/intel.jl")'
diff --git a/Project.toml b/Project.toml
index 065c820..add609b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,19 +22,19 @@ KrylovPreconditionersCUDAExt = "CUDA"
 KrylovPreconditionersoneAPIExt = "oneAPI"
 
 [compat]
-AMDGPU = "0.9"
-Adapt = "3, 4"
+AMDGPU = "0.9, 1.0"
+Adapt = "4"
 CUDA = "5.3.0"
-oneAPI = "1.5.0"
+oneAPI = "1.6.0"
 KernelAbstractions = "0.9"
 Krylov = "0.9.4"
 LightGraphs = "1"
-LinearAlgebra = "1.9"
+LinearAlgebra = "1.10"
 Metis = "1"
-Random = "1.9"
-SparseArrays = "1.9"
-Test = "1.9"
-julia = "1.9"
+Random = "1.10"
+SparseArrays = "1.10"
+Test = "1.10"
+julia = "1.10"
 
 [extras]
 Krylov = "ba0b0d4f-ebba-5204-a429-3ac8c609bfb7"
diff --git a/src/KrylovPreconditioners.jl b/src/KrylovPreconditioners.jl
index 7497c80..59ddf5d 100644
--- a/src/KrylovPreconditioners.jl
+++ b/src/KrylovPreconditioners.jl
@@ -43,6 +43,7 @@ export TriangularOperator
 include("ic0.jl")
 include("ilu0.jl")
 include("blockjacobi.jl")
+include("ilu/IncompleteLU.jl")
 
 # Scaling
 include("scaling.jl")
diff --git a/src/ilu/IncompleteLU.jl b/src/ilu/IncompleteLU.jl
new file mode 100644
index 0000000..dbb22fa
--- /dev/null
+++ b/src/ilu/IncompleteLU.jl
@@ -0,0 +1,15 @@
+using LinearAlgebra: Factorization, AdjointFactorization, LowerTriangular, UnitLowerTriangular, UpperTriangular
+using SparseArrays
+using Base: @propagate_inbounds
+
+struct ILUFactorization{Tv,Ti} <: Factorization{Tv}
+    L::SparseMatrixCSC{Tv,Ti}
+    U::SparseMatrixCSC{Tv,Ti}
+end
+
+include("sorted_set.jl")
+include("linked_list.jl")
+include("sparse_vector_accumulator.jl")
+include("insertion_sort_update_vector.jl")
+include("application.jl")
+include("crout_ilu.jl")
diff --git a/src/ilu/application.jl b/src/ilu/application.jl
new file mode 100644
index 0000000..4fc02d4
--- /dev/null
+++ b/src/ilu/application.jl
@@ -0,0 +1,221 @@
+import SparseArrays: nnz
+import LinearAlgebra: ldiv!
+import Base.\
+
+export forward_substitution!, backward_substitution!
+export adjoint_forward_substitution!, adjoint_backward_substitution!
+
+"""
+Returns the number of nonzeros of the `L` and `U`
+factor combined.
+
+Excludes the unit diagonal of the `L` factor,
+which is not stored.
+"""
+nnz(F::ILUFactorization) = nnz(F.L) + nnz(F.U)
+
+function ldiv!(F::ILUFactorization, y::AbstractVecOrMat)
+    forward_substitution!(F, y)
+    backward_substitution!(F, y)
+end
+
+function ldiv!(F::AdjointFactorization{<:Any,<:ILUFactorization}, y::AbstractVecOrMat)
+    adjoint_forward_substitution!(F.parent, y)
+    adjoint_backward_substitution!(F.parent, y)
+end
+
+function ldiv!(y::AbstractVector, F::ILUFactorization, x::AbstractVector)
+    y .= x
+    ldiv!(F, y)
+end
+
+function ldiv!(y::AbstractVector, F::AdjointFactorization{<:Any,<:ILUFactorization}, x::AbstractVector)
+    y .= x
+    ldiv!(F, y)
+end
+
+function ldiv!(y::AbstractMatrix, F::ILUFactorization, x::AbstractMatrix)
+    y .= x
+    ldiv!(F, y)
+end
+
+function ldiv!(y::AbstractMatrix, F::AdjointFactorization{<:Any,<:ILUFactorization}, x::AbstractMatrix)
+    y .= x
+    ldiv!(F, y)
+end
+
+(\)(F::ILUFactorization, y::AbstractVecOrMat) = ldiv!(F, copy(y))
+(\)(F::AdjointFactorization{<:Any,<:ILUFactorization}, y::AbstractVecOrMat) = ldiv!(F, copy(y))
+
+"""
+Applies in-place backward substitution with the U factor of F, under the assumptions:
+
+1. U is stored transposed / row-wise
+2. U has no lower-triangular elements stored
+3. U has (nonzero) diagonal elements stored.
+"""
+function backward_substitution!(F::ILUFactorization, y::AbstractVector)
+    U = F.U
+    @inbounds for col = U.n : -1 : 1
+
+        # Substitutions
+        for idx = U.colptr[col + 1] - 1 : -1 : U.colptr[col] + 1
+            y[col] -= U.nzval[idx] * y[U.rowval[idx]]
+        end
+
+        # Final value for y[col]
+        y[col] /= U.nzval[U.colptr[col]]
+    end
+
+    y
+end
+
+function backward_substitution!(F::ILUFactorization, y::AbstractMatrix)
+    U = F.U
+    p = size(y, 2)
+
+    @inbounds for c = 1 : p
+        @inbounds for col = U.n : -1 : 1
+
+            # Substitutions
+            for idx = U.colptr[col + 1] - 1 : -1 : U.colptr[col] + 1
+                y[col,c] -= U.nzval[idx] * y[U.rowval[idx],c]
+            end
+
+            # Final value for y[col,c]
+            y[col,c] /= U.nzval[U.colptr[col]]
+        end
+    end
+
+    y
+end
+
+function backward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector)
+    v .= y
+    backward_substitution!(F, v)
+end
+
+function backward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix)
+    v .= y
+    backward_substitution!(F, v)
+end
+
+function adjoint_backward_substitution!(F::ILUFactorization, y::AbstractVector)
+    L = F.L
+    @inbounds for col = L.n - 1 : -1 : 1
+        # Substitutions
+        for idx = L.colptr[col + 1] - 1 : -1 : L.colptr[col]
+            y[col] -= L.nzval[idx] * y[L.rowval[idx]]
+        end
+    end
+
+    y
+end
+
+function adjoint_backward_substitution!(F::ILUFactorization, y::AbstractMatrix)
+    L = F.L
+    p = size(y, 2)
+    @inbounds for c = 1 : p
+        @inbounds for col = L.n - 1 : -1 : 1
+            # Substitutions
+            for idx = L.colptr[col + 1] - 1 : -1 : L.colptr[col]
+                y[col,c] -= L.nzval[idx] * y[L.rowval[idx],c]
+            end
+        end
+    end
+
+    y
+end
+
+function adjoint_backward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector)
+    v .= y
+    adjoint_backward_substitution!(F, v)
+end
+
+function adjoint_backward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix)
+    v .= y
+    adjoint_backward_substitution!(F, v)
+end
+
+"""
+Applies in-place forward substitution with the L factor of F, under the assumptions:
+
+1. L is stored column-wise (unlike U)
+2. L has no upper triangular elements
+3. L has *no* diagonal elements
+"""
+function forward_substitution!(F::ILUFactorization, y::AbstractVector)
+    L = F.L
+    @inbounds for col = 1 : L.n - 1
+        for idx = L.colptr[col] : L.colptr[col + 1] - 1
+            y[L.rowval[idx]] -= L.nzval[idx] * y[col]
+        end
+    end
+
+    y
+end
+
+function forward_substitution!(F::ILUFactorization, y::AbstractMatrix)
+    L = F.L
+    p = size(y, 2)
+    @inbounds for c = 1 : p
+        @inbounds for col = 1 : L.n - 1
+            for idx = L.colptr[col] : L.colptr[col + 1] - 1
+                y[L.rowval[idx],c] -= L.nzval[idx] * y[col,c]
+            end
+        end
+    end
+
+    y
+end
+
+function forward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector)
+    v .= y
+    forward_substitution!(F, v)
+end
+
+function forward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix)
+    v .= y
+    forward_substitution!(F, v)
+end
+
+function adjoint_forward_substitution!(F::ILUFactorization, y::AbstractVector)
+    U = F.U
+    @inbounds for col = 1 : U.n
+        # Final value for y[col]
+        y[col] /= U.nzval[U.colptr[col]]
+
+        for idx = U.colptr[col] + 1 : U.colptr[col + 1] - 1
+            y[U.rowval[idx]] -= U.nzval[idx] * y[col]
+        end
+    end
+
+    y
+end
+
+function adjoint_forward_substitution!(F::ILUFactorization, y::AbstractMatrix)
+    U = F.U
+    p = size(y, 2)
+    @inbounds for c = 1 : p
+        @inbounds for col = 1 : U.n
+            # Final value for y[col,c]
+            y[col,c] /= U.nzval[U.colptr[col]]
+
+            for idx = U.colptr[col] + 1 : U.colptr[col + 1] - 1
+                y[U.rowval[idx],c] -= U.nzval[idx] * y[col,c]
+            end
+        end
+    end
+
+    y
+end
+
+function adjoint_forward_substitution!(v::AbstractVector, F::ILUFactorization, y::AbstractVector)
+    v .= y
+    adjoint_forward_substitution!(F, v)
+end
+
+function adjoint_forward_substitution!(v::AbstractMatrix, F::ILUFactorization, y::AbstractMatrix)
+    v .= y
+    adjoint_forward_substitution!(F, v)
+end
diff --git a/src/ilu/crout_ilu.jl b/src/ilu/crout_ilu.jl
new file mode 100644
index 0000000..3ceadc9
--- /dev/null
+++ b/src/ilu/crout_ilu.jl
@@ -0,0 +1,113 @@
+export ilu
+
+function lutype(T::Type)
+    UT = typeof(oneunit(T) - oneunit(T) * (oneunit(T) / (oneunit(T) + zero(T))))
+    LT = typeof(oneunit(UT) / oneunit(UT))
+    S = promote_type(T, LT, UT)
+end
+
+function ilu(A::SparseMatrixCSC{ATv,Ti}; τ = 1e-3) where {ATv,Ti}
+    n = size(A, 1)
+
+    Tv = lutype(ATv)
+
+    L = spzeros(Tv, Ti, n, n)
+    U = spzeros(Tv, Ti, n, n)
+    
+    U_row = SparseVectorAccumulator{Tv,Ti}(n)
+    L_col = SparseVectorAccumulator{Tv,Ti}(n)
+
+    A_reader = RowReader(A)
+    L_reader = RowReader(L, Val{false})
+    U_reader = RowReader(U, Val{false})
+
+    @inbounds for k = Ti(1) : Ti(n)
+
+        ##
+        ## Copy the new row into U_row and the new column into L_col
+        ##
+ 
+        col::Int = first_in_row(A_reader, k)
+
+        while is_column(col)
+            add!(U_row, nzval(A_reader, col), col)
+            next_col = next_column(A_reader, col)
+            next_row!(A_reader, col)
+
+            # Check if the next nonzero in this column
+            # is still above the diagonal
+            if has_next_nonzero(A_reader, col) && nzrow(A_reader, col) ≤ col
+                enqueue_next_nonzero!(A_reader, col)
+            end
+
+            col = next_col
+        end
+
+        # Copy the remaining part of the column into L_col
+        axpy!(one(Tv), A, k, nzidx(A_reader, k), L_col)
+
+        ##
+        ## Combine the vectors:
+        ##
+        
+        # U_row[k:n] -= L[k,i] * U[i,k:n] for i = 1 : k - 1
+        col = first_in_row(L_reader, k)
+        
+        while is_column(col)
+            axpy!(-nzval(L_reader, col), U, col, nzidx(U_reader, col), U_row)
+
+            next_col = next_column(L_reader, col)
+            next_row!(L_reader, col)
+
+            if has_next_nonzero(L_reader, col)
+                enqueue_next_nonzero!(L_reader, col)
+            end
+
+            col = next_col
+        end
+        
+        # Nothing is happening here when k = n, maybe remove?
+        # L_col[k+1:n] -= U[i,k] * L[i,k+1:n] for i = 1 : k - 1
+        if k < n
+            col = first_in_row(U_reader, k)
+
+            while is_column(col)
+                axpy!(-nzval(U_reader, col), L, col, nzidx(L_reader, col), L_col)
+
+                next_col = next_column(U_reader, col)
+                next_row!(U_reader, col)
+
+                if has_next_nonzero(U_reader, col)
+                    enqueue_next_nonzero!(U_reader, col)
+                end
+
+                col = next_col
+            end
+        end
+
+        ## 
+        ## Apply a drop rule
+        ##
+
+        U_diag_element = U_row.nzval[k]
+        # U_diag_element = U_row.values[k]
+
+        # Append the columns        
+        append_col!(U, U_row, k, τ)
+        append_col!(L, L_col, k, τ, inv(U_diag_element))
+
+        # Add the new row and column to U_nonzero_col, L_nonzero_row, U_first, L_first
+        # (First index *after* the diagonal)
+        U_reader.next_in_column[k] = U.colptr[k] + 1
+        if U.colptr[k] < U.colptr[k + 1] - 1
+            enqueue_next_nonzero!(U_reader, k)
+        end
+
+        L_reader.next_in_column[k] = L.colptr[k]
+        if L.colptr[k] < L.colptr[k + 1]
+            enqueue_next_nonzero!(L_reader, k)
+        end
+    end
+
+    return ILUFactorization(L, U)
+end
diff --git a/src/ilu/insertion_sort_update_vector.jl b/src/ilu/insertion_sort_update_vector.jl
new file mode 100644
index 0000000..d49d16c
--- /dev/null
+++ b/src/ilu/insertion_sort_update_vector.jl
@@ -0,0 +1,92 @@
+import Base: getindex, setindex!, empty!, Vector
+import LinearAlgebra: axpy!
+
+"""
+`InsertableSparseVector` accumulates the sparse vector
+result from SpMV. Initialization requires O(N) work,
+therefore the data structure is reused. Insertion
+requires O(nnz) at worst, as insertion sort is used.
+"""
+struct InsertableSparseVector{Tv}
+    values::Vector{Tv}
+    indices::SortedSet
+
+    InsertableSparseVector{Tv}(n::Int) where {Tv} = new(Vector{Tv}(undef, n), SortedSet(n))
+end
+
+@propagate_inbounds getindex(v::InsertableSparseVector{Tv}, idx::Int) where {Tv} = v.values[idx]
+@propagate_inbounds setindex!(v::InsertableSparseVector{Tv}, value::Tv, idx::Int) where {Tv} = v.values[idx] = value
+@inline indices(v::InsertableSparseVector) = Vector(v.indices)
+
+function Vector(v::InsertableSparseVector{Tv}) where {Tv}
+    vals = zeros(Tv, v.indices.N - 1)
+    for index in v.indices
+        @inbounds vals[index] = v.values[index]
+    end
+    return vals
+end
+
+"""
+Sets `v[idx] += a` when `idx` is occupied, or sets `v[idx] = a`.
+Complexity is O(nnz). The `prev_idx` can be used to start the linear
+search at `prev_idx`, useful when multiple already sorted values
+are added.
+"""
+function add!(v::InsertableSparseVector, a, idx::Integer, prev_idx::Integer)
+    if push!(v.indices, idx, prev_idx)
+        @inbounds v[idx] = a
+    else
+        @inbounds v[idx] += a
+    end
+
+    v
+end
+
+"""
+Add without providing a previous index.
+"""
+@propagate_inbounds add!(v::InsertableSparseVector, a, idx::Integer) = add!(v, a, idx, v.indices.N)
+
+function axpy!(a, A::SparseMatrixCSC, column::Integer, start::Integer, y::InsertableSparseVector)
+    prev_index = y.indices.N
+    
+    @inbounds for idx = start : A.colptr[column + 1] - 1
+        add!(y, a * A.nzval[idx], A.rowval[idx], prev_index)
+        prev_index = A.rowval[idx]
+    end
+
+    y
+end
+
+"""
+Empties the InsterableSparseVector in O(1) operations.
+"""
+@inline empty!(v::InsertableSparseVector) = empty!(v.indices)
+
+"""
+Basically `A[:, j] = scale * drop(y)`, where drop removes
+values less than `drop`.
+
+Resets the `InsertableSparseVector`.
+
+Note: does *not* update `A.colptr` for columns > j + 1,
+as that is done during the steps.
+"""
+function append_col!(A::SparseMatrixCSC{Tv}, y::InsertableSparseVector{Tv}, j::Int, drop::Tv, scale::Tv = one(Tv)) where {Tv}
+    
+    total = 0
+    
+    @inbounds for row = y.indices
+        if abs(y[row]) ≥ drop || row == j
+            push!(A.rowval, row)
+            push!(A.nzval, scale * y[row])
+            total += 1
+        end
+    end
+
+    @inbounds A.colptr[j + 1] = A.colptr[j] + total
+    
+    empty!(y)
+
+    nothing
+end
diff --git a/src/ilu/linked_list.jl b/src/ilu/linked_list.jl
new file mode 100644
index 0000000..4048ed2
--- /dev/null
+++ b/src/ilu/linked_list.jl
@@ -0,0 +1,60 @@
+import Base: push!
+
+"""
+The factor L is stored column-wise, but we need
+all nonzeros in row `row`. We already keep track of 
+the first nonzero in each column (at most `n` indices).
+Take `l = LinkedLists(n)`. Let `l.head[row]` be the column
+of some nonzero in row `row`. Then we can store the column
+of the next nonzero of row `row` in `l.next[l.head[row]]`, etc.
+That "spot" is empty and there will never be a conflict
+because as long as we only store the first nonzero per column: 
+the column is then a unique identifier.
+"""
+struct LinkedLists{Ti}
+    head::Vector{Ti}
+    next::Vector{Ti}
+end
+
+LinkedLists{Ti}(n::Integer) where {Ti} = LinkedLists(zeros(Ti, n), zeros(Ti, n))
+
+"""
+For the L-factor: insert in row `head` column `value`
+For the U-factor: insert in column `head` row `value`
+"""
+@propagate_inbounds function push!(l::LinkedLists, head::Integer, value::Integer)
+    l.head[head], l.next[value] = value, l.head[head]
+    return l
+end
+
+struct RowReader{Tv,Ti}
+    A::SparseMatrixCSC{Tv,Ti}
+    next_in_column::Vector{Ti}
+    rows::LinkedLists{Ti}
+end
+
+function RowReader(A::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
+    n = size(A, 2)
+    @inbounds next_in_column = [A.colptr[i] for i = 1 : n]
+    rows = LinkedLists{Ti}(n)
+    @inbounds for i = Ti(1) : Ti(n)
+        push!(rows, A.rowval[A.colptr[i]], i)
+    end
+    return RowReader(A, next_in_column, rows)
+end
+
+function RowReader(A::SparseMatrixCSC{Tv,Ti}, initialize::Type{Val{false}}) where {Tv,Ti}
+    n = size(A, 2)
+    return RowReader(A, zeros(Ti, n), LinkedLists{Ti}(n))
+end
+
+@propagate_inbounds nzidx(r::RowReader, column::Integer) = r.next_in_column[column]
+@propagate_inbounds nzrow(r::RowReader, column::Integer) = r.A.rowval[nzidx(r, column)]
+@propagate_inbounds nzval(r::RowReader, column::Integer) = r.A.nzval[nzidx(r, column)]
+
+@propagate_inbounds has_next_nonzero(r::RowReader, column::Integer) = nzidx(r, column) < r.A.colptr[column + 1]
+@propagate_inbounds enqueue_next_nonzero!(r::RowReader, column::Integer) = push!(r.rows, nzrow(r, column), column)
+@propagate_inbounds next_column(r::RowReader, column::Integer) = r.rows.next[column]
+@propagate_inbounds first_in_row(r::RowReader, row::Integer) = r.rows.head[row]
+@propagate_inbounds is_column(column::Integer) = column != 0
+@propagate_inbounds next_row!(r::RowReader, column::Integer) = r.next_in_column[column] += 1
diff --git a/src/ilu/sorted_set.jl b/src/ilu/sorted_set.jl
new file mode 100644
index 0000000..09ae78c
--- /dev/null
+++ b/src/ilu/sorted_set.jl
@@ -0,0 +1,78 @@
+import Base: iterate, push!, Vector, getindex, setindex!, show, empty!
+
+"""
+SortedSet keeps track of a sorted set of integers ≤ N
+using insertion sort with a linked list structure in a pre-allocated 
+vector. Requires O(N + 1) memory. Insertion goes via a linear scan in O(n)
+where `n` is the number of stored elements, but can be accelerated 
+by passing along a known value in the set (which is useful when pushing
+in an already sorted list). The insertion itself requires O(1) operations
+due to the linked list structure. Provides iterators:
+```julia
+ints = SortedSet(10)
+push!(ints, 5)
+push!(ints, 3)
+for value in ints
+    println(value)
+end
+```
+"""
+struct SortedSet
+    next::Vector{Int}
+    N::Int
+
+    function SortedSet(N::Int)
+        next = Vector{Int}(undef, N + 1)
+        @inbounds next[N + 1] = N + 1
+        new(next, N + 1)
+    end
+end
+
+# Convenience wrappers for indexing
+@propagate_inbounds getindex(s::SortedSet, i::Int) = s.next[i]
+@propagate_inbounds setindex!(s::SortedSet, value::Int, i::Int) = s.next[i] = value
+
+# Iterate in 
+@inline function iterate(s::SortedSet, p::Int = s.N) 
+    @inbounds nxt = s[p]
+    return nxt == s.N ? nothing : (nxt, nxt)
+end
+
+show(io::IO, s::SortedSet) = print(io, typeof(s), " with values ", Vector(s))
+
+"""
+For debugging and testing
+"""
+function Vector(s::SortedSet)
+    v = Int[]
+    for index in s
+        push!(v, index)
+    end
+    return v
+end
+
+"""
+Insert `index` after a known value `after`
+"""
+function push!(s::SortedSet, value::Int, after::Int)
+    @inbounds begin
+        while s[after] < value
+            after = s[after]
+        end
+
+        if s[after] == value
+            return false
+        end
+        
+        s[after], s[value] = value, s[after]
+        
+        return true
+    end
+end
+
+"""
+Make the head pointer do a self-loop.
+"""
+@inline empty!(s::SortedSet) = s[s.N] = s.N
+
+@inline push!(s::SortedSet, index::Int) = push!(s, index, s.N)
diff --git a/src/ilu/sparse_vector_accumulator.jl b/src/ilu/sparse_vector_accumulator.jl
new file mode 100644
index 0000000..dd06fc0
--- /dev/null
+++ b/src/ilu/sparse_vector_accumulator.jl
@@ -0,0 +1,131 @@
+import Base: setindex!, empty!, Vector
+import LinearAlgebra: axpy!
+
+"""
+`SparseVectorAccumulator` accumulates the sparse vector
+resulting from SpMV. Initialization requires O(N) work,
+therefore the data structure is reused. Insertion is O(1).
+Note that `nzind` is unordered. Also note that there is
+wasted space: `nzind` could be a growing list. Pre-allocation
+seems faster though.
+
+SparseVectorAccumulator incorporates the multiple switch technique
+by Gustavson (1976), which makes resetting an O(1) operation rather
+than O(nnz): the `curr` value is used to flag the occupied indices,
+and `curr` is increased at each reset.
+
+occupied = [0, 1, 0, 1, 0, 0, 0]
+nzind = [2, 4, 0, 0, 0, 0]
+nzval = [0., .1234, 0., .435, 0., 0., 0.]
+nnz = 2
+length = 7
+curr = 1
+"""
+mutable struct SparseVectorAccumulator{Tv,Ti}
+    occupied::Vector{Ti}
+    nzind::Vector{Ti}
+    nzval::Vector{Tv}
+    nnz::Ti
+    length::Ti
+    curr::Ti
+
+    return SparseVectorAccumulator{Tv,Ti}(N::Integer) where {Tv,Ti} = new(
+        zeros(Ti, N),
+        Vector{Ti}(undef, N),
+        Vector{Tv}(undef, N),
+        0,
+        N,
+        1
+    )
+end
+
+function Vector(v::SparseVectorAccumulator{T}) where {T}
+    x = zeros(T, v.length)
+    @inbounds x[v.nzind[1 : v.nnz]] = v.nzval[v.nzind[1 : v.nnz]]
+    return x
+end
+
+"""
+Add a part of a SparseMatrixCSC column to a SparseVectorAccumulator,
+starting at a given index until the end.
+"""
+function axpy!(a, A::SparseMatrixCSC, column, start, y::SparseVectorAccumulator)
+    # Loop over the whole column of A
+    @inbounds for idx = start : A.colptr[column + 1] - 1
+        add!(y, a * A.nzval[idx], A.rowval[idx])
+    end
+
+    return y
+end
+
+"""
+Sets `v[idx] += a` when `idx` is occupied, or sets `v[idx] = a`.
+Complexity is O(1).
+"""
+function add!(v::SparseVectorAccumulator, a, idx)
+    @inbounds begin
+        if isoccupied(v, idx)
+            v.nzval[idx] += a
+        else
+            v.nnz += 1
+            v.occupied[idx] = v.curr
+            v.nzval[idx] = a
+            v.nzind[v.nnz] = idx
+        end
+    end
+
+    return nothing
+end
+
+"""
+Check whether `idx` is nonzero.
+"""
+@propagate_inbounds isoccupied(v::SparseVectorAccumulator, idx::Integer) = v.occupied[idx] == v.curr
+
+"""
+Empty the SparseVectorAccumulator in O(1) operations.
+"""
+@inline function empty!(v::SparseVectorAccumulator)
+    v.curr += 1
+    v.nnz = 0
+end
+
+"""
+Basically `A[:, j] = scale * drop(y)`, where drop removes
+values less than `drop`. Note: sorts the `nzind`'s of `y`, 
+so that the column can be appended to a SparseMatrixCSC.
+
+Resets the `SparseVectorAccumulator`.
+
+Note: does *not* update `A.colptr` for columns > j + 1,
+as that is done during the steps.
+"""
+function append_col!(A::SparseMatrixCSC, y::SparseVectorAccumulator, j::Integer, drop, scale = one(eltype(A)))
+    # Move the indices of interest up front
+    total = 0
+
+    @inbounds for idx = 1 : y.nnz
+        row = y.nzind[idx]
+        value = y.nzval[row]
+
+        if abs(value) ≥ drop || row == j
+            total += 1
+            y.nzind[total] = row
+        end
+    end
+
+    # Sort the retained values.
+    sort!(y.nzind, 1, total, Base.Sort.QuickSort, Base.Order.Forward)
+    
+    @inbounds for idx = 1 : total
+        row = y.nzind[idx]
+        push!(A.rowval, row)
+        push!(A.nzval, scale * y.nzval[row])
+    end
+
+    @inbounds A.colptr[j + 1] = A.colptr[j] + total
+    
+    empty!(y)
+
+    return nothing
+end
diff --git a/test/ilu/application.jl b/test/ilu/application.jl
new file mode 100644
index 0000000..c63fa9c
--- /dev/null
+++ b/test/ilu/application.jl
@@ -0,0 +1,235 @@
+using Test
+using KrylovPreconditioners: ILUFactorization, forward_substitution!, backward_substitution!
+using LinearAlgebra
+
+@testset "Forward and backward substitutions" begin
+    function test_fw_substitution(F::ILUFactorization)
+        A = F.L
+        n = size(A, 1)
+        x = rand(n)
+        y = copy(x)
+        v = zeros(n)
+
+        forward_substitution!(v, F, x)
+        forward_substitution!(F, x)
+        ldiv!(UnitLowerTriangular(A), y)
+
+        @test v ≈ y
+        @test x ≈ y
+
+        x = rand(n, 5)
+        y = copy(x)
+        v = zeros(n, 5)
+
+        forward_substitution!(v, F, x)
+        forward_substitution!(F, x)
+        ldiv!(UnitLowerTriangular(A), y)
+
+        @test v ≈ y
+        @test x ≈ y
+    end
+
+    function test_bw_substitution(F::ILUFactorization)
+        A = F.U
+        n = size(A, 1)
+        x = rand(n)
+        y = copy(x)
+        v = zeros(n)
+
+        backward_substitution!(v, F, x)
+        backward_substitution!(F, x)
+        ldiv!(UpperTriangular(A'), y)
+
+        @test v ≈ y
+        @test x ≈ y
+
+        x = rand(n, 5)
+        y = copy(x)
+        v = zeros(n, 5)
+
+        backward_substitution!(v, F, x)
+        backward_substitution!(F, x)
+        ldiv!(UpperTriangular(A'), y)
+
+        @test v ≈ y
+        @test x ≈ y
+    end
+
+    L = sparse(tril(rand(10, 10), -1))
+    U = sparse(tril(rand(10, 10)) + 10I)
+    F = ILUFactorization(L, U)
+    test_fw_substitution(F)
+    test_bw_substitution(F)
+
+    L = sparse(tril(tril(sprand(10, 10, .5), -1)))
+    U = sparse(tril(sprand(10, 10, .5) + 10I))
+    F = ILUFactorization(L, U)
+    test_fw_substitution(F)
+    test_bw_substitution(F)
+
+    L = spzeros(10, 10)
+    U = spzeros(10, 10) + 10I
+    F = ILUFactorization(L, U)
+    test_fw_substitution(F)
+    test_bw_substitution(F)
+end
+
+@testset "Adjoint -- Forward and backward substitutions" begin
+    function test_adjoint_fw_substitution(F::ILUFactorization)
+        A = F.U
+        n = size(A, 1)
+        x = rand(n)
+        y = copy(x)
+        v = zeros(n)
+
+        adjoint_forward_substitution!(v, F, x)
+        adjoint_forward_substitution!(F, x)
+        ldiv!(LowerTriangular(A), y)
+
+        @test v ≈ y
+        @test x ≈ y
+
+        x = rand(n, 5)
+        x2 = copy(x)
+        y = copy(x)
+        v = zeros(n, 5)
+
+        adjoint_forward_substitution!(v, F, x)
+        adjoint_forward_substitution!(F, x)
+        ldiv!(LowerTriangular(A), y)
+
+        @test v ≈ y
+        @test x ≈ y
+    end
+
+    function test_adjoint_bw_substitution(F::ILUFactorization)
+        A = F.L
+        n = size(A, 1)
+        x = rand(n)
+        y = copy(x)
+        v = zeros(n)
+
+        adjoint_backward_substitution!(v, F, x)
+        adjoint_backward_substitution!(F, x)
+        ldiv!(UnitLowerTriangular(A)', y)
+
+        @test v ≈ y
+        @test x ≈ y
+
+        x = rand(n, 5)
+        y = copy(x)
+        v = zeros(n, 5)
+
+        adjoint_backward_substitution!(v, F, x)
+        adjoint_backward_substitution!(F, x)
+        ldiv!(UnitLowerTriangular(A)', y)
+
+        @test v ≈ y
+        @test x ≈ y
+    end
+
+    L = sparse(tril(rand(10, 10), -1))
+    U = sparse(tril(rand(10, 10)) + 10I)
+    F = ILUFactorization(L, U)
+    test_adjoint_fw_substitution(F)
+    test_adjoint_bw_substitution(F)
+
+    L = sparse(tril(tril(sprand(10, 10, .5), -1)))
+    U = sparse(tril(sprand(10, 10, .5) + 10I))
+    F = ILUFactorization(L, U)
+    test_adjoint_fw_substitution(F)
+    test_adjoint_bw_substitution(F)
+
+    L = spzeros(10, 10)
+    U = spzeros(10, 10) + 10I
+    F = ILUFactorization(L, U)
+    test_adjoint_fw_substitution(F)
+    test_adjoint_bw_substitution(F)
+end
+
+@testset "ldiv!" begin
+    function test_ldiv!(L, U)
+        LU = ILUFactorization(L, U)
+        x = rand(size(LU.L, 1))
+        y = copy(x)
+        z = copy(x)
+        w = copy(x)
+
+        ldiv!(LU, x)
+        ldiv!(UnitLowerTriangular(LU.L), y)
+        ldiv!(UpperTriangular(LU.U'), y)
+
+        @test x ≈ y
+        @test LU \ z == x
+
+        ldiv!(w, LU, z)
+
+        @test w == x
+
+        x = rand(size(LU.L, 1), 5)
+        y = copy(x)
+        z = copy(x)
+        w = copy(x)
+
+        ldiv!(LU, x)
+        ldiv!(UnitLowerTriangular(LU.L), y)
+        ldiv!(UpperTriangular(LU.U'), y)
+
+        @test x ≈ y
+        @test LU \ z == x
+
+        ldiv!(w, LU, z)
+
+        @test w == x
+    end
+
+    test_ldiv!(tril(sprand(10, 10, .5), -1), tril(sprand(10, 10, .5) + 10I))
+end
+
+@testset "Adjoint -- ldiv!" begin
+    function test_adjoint_ldiv!(L, U)
+        LU = ILUFactorization(L, U)
+        ALU = adjoint(LU)
+
+        x = rand(size(LU.L, 1))
+        y = copy(x)
+        z = copy(x)
+        w = copy(x)
+
+        ldiv!(ALU, x)
+        ldiv!(LowerTriangular(LU.U), y)
+        ldiv!(UnitLowerTriangular(LU.L)', y)
+
+        @test x ≈ y
+        @test ALU \ z == x
+
+        ldiv!(w, ALU, z)
+
+        @test w == x
+
+        x = rand(size(LU.L, 1), 5)
+        y = copy(x)
+        z = copy(x)
+        w = copy(x)
+
+        ldiv!(ALU, x)
+        ldiv!(LowerTriangular(LU.U), y)
+        ldiv!(UnitLowerTriangular(LU.L)', y)
+
+        @test x ≈ y
+        @test ALU \ z == x
+
+        ldiv!(w, ALU, z)
+
+        @test w == x
+    end
+
+    test_adjoint_ldiv!(tril(sprand(10, 10, .5), -1), tril(sprand(10, 10, .5) + 10I))
+end
+
+@testset "nnz" begin
+    L = tril(sprand(10, 10, .5), -1)
+    U = tril(sprand(10, 10, .5)) + 10I
+    LU = ILUFactorization(L, U)
+    @test nnz(LU) == nnz(L) + nnz(U)
+end
diff --git a/test/ilu/crout_ilu.jl b/test/ilu/crout_ilu.jl
new file mode 100644
index 0000000..e10905c
--- /dev/null
+++ b/test/ilu/crout_ilu.jl
@@ -0,0 +1,35 @@
+using Test
+using SparseArrays
+using LinearAlgebra
+
+@testset "Crout ILU" for Tv in (Float64, Float32, ComplexF64, ComplexF32), Ti in (Int64, Int32)
+    let
+        # Test if it performs full LU if droptol is zero
+        A = convert(SparseMatrixCSC{Tv, Ti}, sprand(Tv, 10, 10, .5) + 10I)
+        ilu = KrylovPreconditioners.ilu(A, τ = 0)
+        flu = lu(Matrix(A), NoPivot())
+
+        @test typeof(ilu) == KrylovPreconditioners.ILUFactorization{Tv,Ti}
+        @test Matrix(ilu.L + I) ≈ flu.L
+        @test Matrix(transpose(ilu.U)) ≈ flu.U
+    end
+
+    let
+        # Test if L = I and U = diag(A) when the droptol is large.
+        A = convert(SparseMatrixCSC{Tv, Ti}, sprand(10, 10, .5) + 10I)
+        ilu = KrylovPreconditioners.ilu(A, τ = 1.0)
+
+        @test nnz(ilu.L) == 0
+        @test nnz(ilu.U) == 10
+        @test diag(ilu.U) == diag(A)
+    end
+end
+
+@testset "Crout ILU with integer matrix" begin
+    A = sparse(Int32(1):Int32(10), Int32(1):Int32(10), 1)
+    ilu = KrylovPreconditioners.ilu(A, τ = 0)
+
+    @test typeof(ilu) == KrylovPreconditioners.ILUFactorization{Float64,Int32}
+    @test nnz(ilu.L) == 0
+    @test diag(ilu.U) == diag(A)
+end
diff --git a/test/ilu/ilu.jl b/test/ilu/ilu.jl
new file mode 100644
index 0000000..eda539a
--- /dev/null
+++ b/test/ilu/ilu.jl
@@ -0,0 +1,6 @@
+include("sorted_set.jl")
+include("linked_list.jl")
+include("sparse_vector_accumulator.jl")
+include("insertion_sort_update_vector.jl")
+include("application.jl")
+include("crout_ilu.jl")
diff --git a/test/ilu/insertion_sort_update_vector.jl b/test/ilu/insertion_sort_update_vector.jl
new file mode 100644
index 0000000..e7b64e0
--- /dev/null
+++ b/test/ilu/insertion_sort_update_vector.jl
@@ -0,0 +1,59 @@
+using Test
+
+using KrylovPreconditioners: InsertableSparseVector, add!, axpy!, append_col!, indices
+
+@testset "InsertableSparseVector" begin
+    @testset "Insertion sorted sparse vector" begin
+        v = InsertableSparseVector{Float64}(10)
+
+        add!(v, 3.0, 6, 11)
+        add!(v, 3.0, 3, 11)
+        add!(v, 3.0, 3, 11)
+
+        @test v[6] == 3.0
+        @test v[3] == 6.0
+        @test indices(v) == [3, 6]
+    end
+
+    @testset "Add column of SparseMatrixCSC" begin
+        v = InsertableSparseVector{Float64}(5)
+        A = sprand(5, 5, 1.0)
+        axpy!(2., A, 3, A.colptr[3], v)
+        axpy!(3., A, 4, A.colptr[4], v)
+        @test Vector(v) == 2 * A[:, 3] + 3 * A[:, 4]
+    end
+
+    @testset "Append column to SparseMatrixCSC" begin
+        A = spzeros(5, 5)
+        v = InsertableSparseVector{Float64}(5)
+
+        add!(v, 0.3, 1)
+        add!(v, 0.009, 3)
+        add!(v, 0.12, 4)
+        add!(v, 0.007, 5)
+        append_col!(A, v, 1, 0.1)
+
+        # Test whether the column is copied correctly
+        # and the dropping rule is applied
+        @test A[1, 1] == 0.3
+        @test A[2, 1] == 0.0 # zero
+        @test A[3, 1] == 0.0 # dropped
+        @test A[4, 1] == 0.12
+        @test A[5, 1] == 0.0 # dropped
+
+        # Test whether the InsertableSparseVector is reset
+        # when reusing it for the second column. Also do
+        # scaling with a factor of 10.
+        add!(v, 0.5, 2)
+        add!(v, 0.009, 3)
+        add!(v, 0.5, 4)
+        add!(v, 0.007, 5)
+        append_col!(A, v, 2, 0.1, 10.0)
+
+        @test A[1, 2] == 0.0 # zero
+        @test A[2, 2] == 5.0 # scaled
+        @test A[3, 2] == 0.0 # dropped
+        @test A[4, 2] == 5.0 # scaled
+        @test A[5, 2] == 0.0 # dropped
+    end
+end
diff --git a/test/ilu/linked_list.jl b/test/ilu/linked_list.jl
new file mode 100644
index 0000000..2671a38
--- /dev/null
+++ b/test/ilu/linked_list.jl
@@ -0,0 +1,48 @@
+using Test
+using KrylovPreconditioners: LinkedLists, RowReader, first_in_row, is_column, nzval, next_column, 
+                    next_row!, has_next_nonzero, enqueue_next_nonzero!
+using SparseArrays
+
+@testset "Linked List" begin
+    n = 5
+    let
+        lists = LinkedLists{Int}(n)
+
+        # head[2] -> 5 -> nil
+        # head[5] -> 4 -> 3 -> nil
+        push!(lists, 5, 3)
+        push!(lists, 5, 4)
+        push!(lists, 2, 5)
+
+        @test lists.head[5] == 4
+        @test lists.next[4] == 3
+        @test lists.next[3] == 0
+
+        @test lists.head[2] == 5
+        @test lists.next[5] == 0
+    end
+end
+
+@testset "Read SparseMatrixCSC row by row" begin
+    # Read a sparse matrix row by row.
+    n = 10
+    A = sprand(n, n, .5)
+    reader = RowReader(A)
+
+    for row = 1 : n
+        column = first_in_row(reader, row)
+
+        while is_column(column)
+            @test nzval(reader, column) == A[row, column]
+
+            next_col = next_column(reader, column)
+            next_row!(reader, column)
+
+            if has_next_nonzero(reader, column)
+                enqueue_next_nonzero!(reader, column)
+            end
+
+            column = next_col
+        end
+    end
+end
diff --git a/test/ilu/sorted_set.jl b/test/ilu/sorted_set.jl
new file mode 100644
index 0000000..bc30253
--- /dev/null
+++ b/test/ilu/sorted_set.jl
@@ -0,0 +1,41 @@
+using Test
+
+import KrylovPreconditioners: SortedSet, push!
+
+@testset "Sorted indices" begin
+    @testset "New values" begin
+        indices = SortedSet(10)
+        @test push!(indices, 5)
+        @test push!(indices, 7)
+        @test push!(indices, 4)
+        @test push!(indices, 6)
+        @test push!(indices, 8)
+
+        as_vec = Vector(indices)
+        @test as_vec == [4, 5, 6, 7, 8]
+    end
+
+    @testset "Duplicate values" begin
+        indices = SortedSet(10)
+        @test push!(indices, 3)
+        @test push!(indices, 3) == false
+        @test push!(indices, 8)
+        @test push!(indices, 8) == false
+        @test Vector(indices) == [3, 8]
+    end
+
+    @testset "Quick insertion with known previous index" begin
+        indices = SortedSet(10)
+        @test push!(indices, 3)
+        @test push!(indices, 4, 3)
+        @test push!(indices, 8, 4)
+        @test Vector(indices) == [3, 4, 8]
+    end
+
+    @testset "Pretty printing" begin
+        indices = SortedSet(10)
+        push!(indices, 3)
+        push!(indices, 2)
+        @test occursin("with values", sprint(show, indices))
+    end
+end
diff --git a/test/ilu/sparse_vector_accumulator.jl b/test/ilu/sparse_vector_accumulator.jl
new file mode 100644
index 0000000..06b5f90
--- /dev/null
+++ b/test/ilu/sparse_vector_accumulator.jl
@@ -0,0 +1,65 @@
+using KrylovPreconditioners: SparseVectorAccumulator, add!, append_col!, isoccupied
+using LinearAlgebra
+
+@testset "SparseVectorAccumulator" for Ti in (Int32, Int64), Tv in (Float64, Float32)
+    @testset "Initialization" begin
+        v = SparseVectorAccumulator{Tv,Ti}(10)
+        @test iszero(v.nnz)
+        @test iszero(v.occupied)
+    end
+
+    @testset "Add to SparseVectorAccumulator" begin
+        v = SparseVectorAccumulator{Tv,Ti}(3)
+        add!(v, Tv(1.0), Ti(3))
+        add!(v, Tv(1.0), Ti(3))
+        add!(v, Tv(3.0), Ti(2))
+        @test v.nnz == 2
+        @test isoccupied(v, 1) == false
+        @test isoccupied(v, 2)
+        @test isoccupied(v, 3)
+        @test Vector(v) == Tv[0.; 3.0; 2.0]
+    end
+
+    @testset "Add column of SparseMatrixCSC" begin
+        # Copy all columns of a 
+        v = SparseVectorAccumulator{Tv,Ti}(5)
+        A = convert(SparseMatrixCSC{Tv,Ti}, sprand(Tv, 5, 5, 1.0))
+        axpy!(Tv(2), A, Ti(3), A.colptr[3], v)
+        axpy!(Tv(3), A, Ti(4), A.colptr[4], v)
+        @test Vector(v) == 2 * A[:, 3] + 3 * A[:, 4]
+    end
+
+    @testset "Append column to SparseMatrixCSC" begin
+        A = spzeros(Tv, Ti, 5, 5)
+        v = SparseVectorAccumulator{Tv,Ti}(5)
+
+        add!(v, Tv(0.3), Ti(1))
+        add!(v, Tv(0.009), Ti(3))
+        add!(v, Tv(0.12), Ti(4))
+        add!(v, Tv(0.007), Ti(5))
+        append_col!(A, v, Ti(1), Tv(0.1))
+
+        # Test whether the column is copied correctly
+        # and the dropping rule is applied
+        @test A[1, 1] == Tv(0.3)
+        @test A[2, 1] == Tv(0.0) # zero
+        @test A[3, 1] == Tv(0.0) # dropped
+        @test A[4, 1] == Tv(0.12)
+        @test A[5, 1] == Tv(0.0) # dropped
+
+        # Test whether the InsertableSparseVector is reset
+        # when reusing it for the second column. Also do
+        # scaling with a factor of 10.
+        add!(v, Tv(0.5), Ti(2))
+        add!(v, Tv(0.009), Ti(3))
+        add!(v, Tv(0.5), Ti(4))
+        add!(v, Tv(0.007), Ti(5))
+        append_col!(A, v, Ti(2), Tv(0.1), Tv(10.0))
+
+        @test A[1, 2] == Tv(0.0) # zero
+        @test A[2, 2] == Tv(5.0) # scaled
+        @test A[3, 2] == Tv(0.0) # dropped
+        @test A[4, 2] == Tv(5.0) # scaled
+        @test A[5, 2] == Tv(0.0) # dropped
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9a18c02..0092bf9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -2,6 +2,7 @@ using AMDGPU
 using CUDA
 using oneAPI
 using Test
+using KrylovPreconditioners
 
 @testset "KrylovPreconditioners" begin
 if AMDGPU.functional()
@@ -24,4 +25,8 @@ if oneAPI.functional()
         include("gpu/intel.jl")
     end
 end
+
+@testset "IncompleteLU.jl" begin
+    include("ilu/ilu.jl")
+end
 end