diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 46f0e5e8..4e3e2d93 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -6,10 +6,18 @@ env: CLIMATEMACHINE_SETTINGS_FIX_RNG_SEED: "true" steps: - - label: "init perf env" - key: "init_perf_env" + - label: "init cpu env" + key: "init_cpu_env" command: - "echo $JULIA_DEPOT_PATH" + + - echo "--- Instantiate test" + - "julia --project=test -e 'using Pkg; Pkg.develop(path=\".\")'" + - "julia --project=test -e 'using Pkg; Pkg.instantiate(;verbose=true)'" + - "julia --project=test -e 'using Pkg; Pkg.precompile(;strict=true)'" + - "julia --project=perf -e 'using Pkg; Pkg.status()'" + + - echo "--- Instantiate perf" - "julia --project=perf -e 'using Pkg; Pkg.instantiate(;verbose=true)'" - "julia --project=perf -e 'using Pkg; Pkg.precompile()'" - "julia --project=perf -e 'using Pkg; Pkg.status()'" @@ -35,6 +43,15 @@ steps: - wait + - label: "Unit tests and plots" + command: + - "julia --project -e 'using Pkg; Pkg.test()'" + artifact_paths: "test/output/*" + agents: + config: cpu + queue: central + slurm_ntasks: 1 + - label: "GPU tests" command: - "julia --project -e 'using Pkg; Pkg.test(;test_args=[\"CuArray\"])'" diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl index 46344e7c..feb758f6 100644 --- a/src/ClimaTimeSteppers.jl +++ b/src/ClimaTimeSteppers.jl @@ -70,13 +70,17 @@ end SciMLBase.allowscomplex(alg::DistributedODEAlgorithm) = true include("integrators.jl") +include("solvers/matrix_utilities.jl") +include("solvers/update_signal_handler.jl") include("solvers/convergence_condition.jl") include("solvers/convergence_checker.jl") include("solvers/newtons_method.jl") include("solvers/imex_ark.jl") +include("solvers/rosenbrock.jl") # Include concrete implementations include("solvers/imex_ark_tableaus.jl") +include("solvers/rosenbrock_tableaus.jl") include("solvers/multirate.jl") include("solvers/lsrk.jl") include("solvers/ssprk.jl") @@ -84,7 +88,6 @@ include("solvers/ark.jl") # include("solvers/ars.jl") # previous implementations of ARS schemes include("solvers/mis.jl") include("solvers/wickerskamarock.jl") -include("solvers/rosenbrock.jl") include("callbacks.jl") diff --git a/src/functions.jl b/src/functions.jl index cd0665bd..f23f5de3 100644 --- a/src/functions.jl +++ b/src/functions.jl @@ -9,9 +9,19 @@ un .= u .+ dt * f(u, p, t) ``` """ -struct ForwardEulerODEFunction{F} <: DiffEqBase.AbstractODEFunction{true} +struct ForwardEulerODEFunction{F, J, W, T} <: + DiffEqBase.AbstractODEFunction{true} f::F + jac_prototype::J + Wfact::W + tgrad::T end +ForwardEulerODEFunction( + f; + jac_prototype = nothing, + Wfact = nothing, + tgrad = nothing, +) = ForwardEulerODEFunction(f, jac_prototype, Wfact, tgrad) (f::ForwardEulerODEFunction{F})(un, u, p, t, dt) where {F} = f.f(un, u, p, t, dt) # Don't wrap a ForwardEulerODEFunction in an ODEFunction. diff --git a/src/integrators.jl b/src/integrators.jl index 209a53a5..1f515a6b 100644 --- a/src/integrators.jl +++ b/src/integrators.jl @@ -29,7 +29,7 @@ function DiffEqBase.__init( args...; dt, # required stepstop=-1, - adjustfinal=false, + adjustfinal=true, callback=nothing, save_func=(u,t,integrator)->copy(u), saveat=nothing, diff --git a/src/solvers/imex_ark.jl b/src/solvers/imex_ark.jl index d0366cdd..558a07c6 100644 --- a/src/solvers/imex_ark.jl +++ b/src/solvers/imex_ark.jl @@ -15,7 +15,7 @@ The abscissae are often defined as c_χ[i] := ∑_{j=1}^s a_χ[i,j] for the expl and implicit methods to be "internally consistent", with c_exp[i] = c_imp[i] for the overall IMEX method to be "internally consistent", but this is not required. If the weights are defined as b_χ[j] := a_χ[s,j], then u_next = U[s]; i.e., the -method is FSAL (first same as last). +method is FSAL (first same as last), which means that it is "stiffly accurate". To simplify our notation, let a_χ[s+1,j] := b_χ[j] ∀ j ∈ 1:s, diff --git a/src/solvers/matrix_utilities.jl b/src/solvers/matrix_utilities.jl new file mode 100644 index 00000000..ddbac704 --- /dev/null +++ b/src/solvers/matrix_utilities.jl @@ -0,0 +1,33 @@ +lower_plus_diagonal(matrix::T) where {T} = + T(LinearAlgebra.LowerTriangular(matrix)) +diagonal(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix)) +lower(matrix) = lower_plus_diagonal(matrix) - diagonal(matrix) + +lower_triangular_inv(matrix::T) where {T} = + T(inv(LinearAlgebra.LowerTriangular(matrix))) + +to_enumerated_rows(x) = x +function to_enumerated_rows(matrix::AbstractMatrix) + rows = tuple(1:size(matrix, 1)...) + nonzero_indices = map(i -> findall(matrix[i, :] .!= 0), rows) + enumerated_rows = map( + i -> tuple(zip(nonzero_indices[i], matrix[i, nonzero_indices[i]])...), + rows, + ) + return enumerated_rows +end + +linear_combination_terms(enumerated_row, vectors) = + map(((j, val),) -> broadcasted(*, val, vectors[j]), enumerated_row) +function linear_combination(enumerated_row, vectors) + length(enumerated_row) == 0 && return nothing + terms = linear_combination_terms(enumerated_row, vectors) + length(enumerated_row) == 1 && return terms[1] + return broadcasted(+, terms...) +end + +@generated foreachval(f::F, ::Val{N}) where {F, N} = + quote + Base.@nexprs $N i -> f(Val(i)) + return nothing + end diff --git a/src/solvers/old_rosenbrock.jl b/src/solvers/old_rosenbrock.jl new file mode 100644 index 00000000..35d85b45 --- /dev/null +++ b/src/solvers/old_rosenbrock.jl @@ -0,0 +1,102 @@ +export SSPKnoth + +abstract type RosenbrockAlgorithm <: DistributedODEAlgorithm end + +struct RosenbrockTableau{N, RT, N²} + A::SMatrix{N, N, RT, N²} + C::SMatrix{N, N, RT, N²} + Γ::SMatrix{N, N, RT, N²} + m::SMatrix{N, 1, RT, N} +end + +struct RosenbrockCache{Nstages, RT, N², A} + tableau::RosenbrockTableau{Nstages, RT, N²} + U::A + fU::A + k::NTuple{Nstages, A} + W + linsolve! +end + +function cache( + prob::DiffEqBase.AbstractODEProblem, + alg::RosenbrockAlgorithm; kwargs...) + + tab = tableau(alg, eltype(prob.u0)) + Nstages = length(tab.m) + U = zero(prob.u0) + fU = zero(prob.u0) + k = ntuple(n -> similar(prob.u0), Nstages) + W = prob.f.jac_prototype + linsolve! = alg.linsolve(Val{:init}, W, prob.u0; kwargs...) + + return RosenbrockCache(tab, U, fU, k, W, linsolve!) +end + + +function step_u!(int, cache::RosenbrockCache{Nstages, RT}) where {Nstages, RT} + tab = cache.tableau + + f! = int.prob.f + Wfact_t! = int.prob.f.Wfact_t + + u = int.u + p = int.p + t = int.t + dt = int.dt + W = cache.W + U = cache.U + fU = cache.fU + k = cache.k + linsolve! = cache.linsolve! + + # 1) compute jacobian factorization + γ = dt * tab.Γ[1,1] + Wfact_t!(W, u, p, γ, t) + for i in 1:Nstages + U .= u + for j = 1:i-1 + U .+= tab.A[i,j] .* k[j] + end + # TODO: there should be a time modification here (t + c * dt) + # if f does depend on time, would need to add tgrad term as well + f!(fU, U, p, t) + for j = 1:i-1 + fU .+= (tab.C[i,j] / dt) .* k[j] + end + linsolve!(k[i], W, fU) + end + for i = 1:Nstages + u .+= tab.m[i] .* k[i] + end +end + +struct SSPKnoth{L} <: RosenbrockAlgorithm + linsolve::L +end +SSPKnoth(;linsolve)=SSPKnoth(linsolve) + + +function tableau(::SSPKnoth, RT) + # ROS.transformed=true; + N = 3 + N² = N*N + α = @SMatrix RT[ + 0 0 0; + 1 0 0; + 1/4 1/4 0] + # ROS.d=ROS.alpha*ones(ROS.nStage,1); + b = @SMatrix RT[1/6 1/6 2/3] + Γ = @SMatrix RT[ + 1 0 0; + 0 1 0; + -3/4 -3/4 1] + A = α / Γ + C = -inv(Γ) + m = b / Γ + return RosenbrockTableau{N, RT, N²}(A, C, Γ, m) +# ROS.SSP.alpha=[1 0 0 +# 3/4 1/4 0 +# 1/3 0 2/3]; + +end diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl index 35d85b45..e3333a07 100644 --- a/src/solvers/rosenbrock.jl +++ b/src/solvers/rosenbrock.jl @@ -1,102 +1,564 @@ -export SSPKnoth +#= +## Introduction to Rosenbrock Methods -abstract type RosenbrockAlgorithm <: DistributedODEAlgorithm end +An s-stage diagonally implicit Runge-Kutta (DIRK) method for solving +∂u/∂t = f(u, t) is specified by a lower triangular s×s matrix a (whose values +are called "internal coefficients"), a vector b of length s (whose values are +called "weights"), and a vector c of length s (whose values are called +"abcissae" or "nodes"). +Given the state u at time t, the state u_next at time t + Δt is given by + u_next := u + Δt * ∑_{i=1}^s bᵢ * f(Uᵢ, t + Δt * cᵢ), where + Uᵢ := u + Δt * ∑_{j=1}^i aᵢⱼ * f(Uⱼ, t + Δt * cⱼ). +In order to transform this DIRK method into a Rosenbrock method, we must assume +that it is "internally consistent", which means that + cᵢ := ∑_{j=1}^i aᵢⱼ. -struct RosenbrockTableau{N, RT, N²} - A::SMatrix{N, N, RT, N²} - C::SMatrix{N, N, RT, N²} - Γ::SMatrix{N, N, RT, N²} - m::SMatrix{N, 1, RT, N} +First, we will simplify our notation by defining + Tᵢ := t + Δt * ∑_{j=1}^i aᵢⱼ and + Fᵢ := f(Uᵢ, Tᵢ). +This simplifies our previous expressions to + u_next = u + Δt * ∑_{i=1}^s bᵢ * Fᵢ and + Uᵢ = u + Δt * ∑_{j=1}^i aᵢⱼ * Fⱼ. +Next, we will define + Ûᵢ := u + Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ and + T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ. +This implies that + Uᵢ = Ûᵢ + Δt * aᵢᵢ * Fᵢ and + Tᵢ = T̂ᵢ + Δt * aᵢᵢ. +Substituting this into the definition of Fᵢ gives us + Fᵢ = f(Ûᵢ + Δt * aᵢᵢ * Fᵢ, T̂ᵢ + Δt * aᵢᵢ). +Approximating the value of f with a first-order Taylor series expansion around +Ûᵢ and T̂ᵢ gives us + Fᵢ ≈ f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * aᵢᵢ * Fᵢ + Δt * ḟ(Ûᵢ, T̂ᵢ) * aᵢᵢ, where + J(u, t) := ∂f/∂u(u, t) and + ḟ(u, t) := ∂f/∂t(u, t). +This is roughly equivalent to running a single Newton iteration to solve for Fᵢ, +starting with an initial guess of f(Ûᵢ, T̂ᵢ) (or, equivalently, to solve for Uᵢ, +starting with an initial guess of Ûᵢ). + +In order to obtain a Rosenbrock method, we must modify this equation for Fᵢ. +By using only a single Newton iteration, we break the assumptions that provide +guarantees about the convergence of the DIRK method. +To remedy this, we introduce an additional lower triangular s×s matrix of +coefficients γ, and we redefine Fᵢ to be the solution to + Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * Fⱼ + + Δt * ḟ(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ. +In other words, we account for the higher-order terms dropped by the first-order +Taylor series expansion by taking carefully chosen linear combinations of Fⱼ. +Since this effectively replaces aᵢᵢ with entries in γ, the diagonal entries can +now be omitted from a, making it strictly lower triangular. + +For a "modified Rosenbrock method", the coefficients are chosen so that +J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) can be approximated by their values at time t, J(u, t) +and ḟ(u, t). +For a "W-method", the approximation can be even coarser; e.g., the values at a +previous timestep, or the values at a precomputed reference state, or even +approximations that do not correspond to any particular state. +So, we will modify the last equation by replacing J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) with +some approximations J and ḟ (whose values must satisfy the constraints of the +specific Rosenbrock method being used), giving us + Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * Fⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ. +Solving this equation for Fᵢ lets us redefine + Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * ( + f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ + + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ). +Since multiplying ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ by J is more computationally expensive +than subtracting it from another value, we will rewrite this as + Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * ( + f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ + + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ. + +To simplify our further reformulations, we will convert our definitions to +matrix form. +First, though, we will reduce the number of matrix equations we need to analyze +by defining + âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ and + Û⁺ᵢ := i < s ? Ûᵢ₊₁ : u_next. +We can then redefine + u_next := Û⁺ₛ and + Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁, where + Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ. + +So, in summary, a Rosenbrock method is defined by some lower triangular s×s +matrix γ, some strictly lower triangular s×s matrix a, some vector b of length +s, and some approximations J and ḟ of ∂f/∂u(Ûᵢ, T̂ᵢ) and ∂f/∂t(Ûᵢ, T̂ᵢ), which +are all used to compute + u_next := Û⁺ₛ, where + âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ, + Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁, + T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ, + Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ, and + Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * ( + f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ + + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ. + +## Converting to Matrix Form + +The equations we will convert to matrix form are + Û⁺ᵢ = u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ and + Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * Fⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ. +Rewriting these with an explicit element index n ∈ 1:N (where N = length(u)) +gives us + Û⁺ᵢ[n] = u[n] + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ[n] and + Fᵢ[n] = f(Ûᵢ, T̂ᵢ)[n] + Δt * ∑_{m=1}^N J[n,m] * ∑_{j=1}^i γᵢⱼ * Fⱼ[m] + + Δt * ḟ[n] * ∑_{j=1}^i γᵢⱼ. +Now, we will formally define the vectors and matrices + 𝟙 ∈ ℝˢ | 𝟙ᵢ := 1, + u ∈ ℝᴺ | uₙ := u[n], + ḟ ∈ ℝᴺ | ḟₙ := ḟ[n], and + Û⁺ ∈ ℝᴺ×ℝˢ | Û⁺ₙᵢ := Û⁺ᵢ[n], + F ∈ ℝᴺ×ℝˢ | Fₙᵢ := Fᵢ[n], + F̂ ∈ ℝᴺ×ℝˢ | F̂ₙᵢ := f(Ûᵢ, T̂ᵢ)[n], + J ∈ ℝᴺ×ℝᴺ | Jₙₘ := J[n,m]. +(If J and ḟ are different for each stage, they may be replaced with tensors in +all of the following manipulations.) +We then have that + Û⁺ₙᵢ = uₙ * 𝟙ᵢ + Δt * ∑_{j=1}^i Fₙⱼ * âᵢⱼ and + Fₙᵢ = F̂ₙᵢ + Δt * ∑_{m=1}^N Jₙₘ * ∑_{j=1}^i Fₘⱼ * γᵢⱼ + + Δt * ḟₙ * ∑_{j=1}^i 𝟙ⱼ * γᵢⱼ. +We can rewrite this as + Û⁺ₙᵢ = uₙ * (𝟙ᵀ)ᵢ + Δt * ∑_{j=1}^s Fₙⱼ * (âᵀ)ⱼᵢ and + Fₙᵢ = F̂ₙᵢ + Δt * ∑_{m=1}^N Jₙₘ * ∑_{j=1}^s Fₘⱼ * (γᵀ)ⱼᵢ + + Δt * ḟₙ * ∑_{j=1}^s (𝟙ᵀ)ⱼ * (γᵀ)ⱼᵢ. +Combining matrix-matrix and matrix-vector products gives us + Û⁺ₙᵢ = uₙ * (𝟙ᵀ)ᵢ + Δt * (F * âᵀ)ₙᵢ and + Fₙᵢ = F̂ₙᵢ + Δt * (J * F * γᵀ)ₙᵢ + Δt * ḟₙ * (γ * 𝟙)ᵢ. +Dropping the indices then tells us that + Û⁺ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and + F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. +To rewrite this in a way that more directly corresponds to our last formulation, +we will define the functions diagonal() and lower(), such that, for any lower +triangular matrix M, + M = diagonal(M) + lower(M), where + diagonal(M)ᵢⱼ := i == j ? Mᵢⱼ : 0 and + lower(M)ᵢⱼ := i > j ? Mᵢⱼ : 0. +This lets us rewrite the equation for F as + F * (I + diagonal(γ)⁻¹ * lower(γ))ᵀ - + Δt * J * F * (I + diagonal(γ)⁻¹ * lower(γ))ᵀ * diagonal(γ)ᵀ = + = F̂ + F * (diagonal(γ)⁻¹ * lower(γ))ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. + +We will now use these matrix equations to define two reformulations: one that +optimizes performance by eliminating the subtraction after the linear solve, and +one that enables limiters by appropriately modifying the value being subtracted. + +## Optimizing Performance + +Let us define a new N×s matrix + K := Δt * F * γᵀ. +We then have that + F = Δt⁻¹ * K * (γ⁻¹)ᵀ. +This allows us to rewrite the matrix equations as + Û⁺ = u ⊗ 𝟙ᵀ + K * (â * γ⁻¹)ᵀ and + Δt⁻¹ * K * (γ⁻¹)ᵀ = F̂ + J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. +Since γ is lower triangular, + γ⁻¹ = diagonal(γ⁻¹) + lower(γ⁻¹) = diagonal(γ)⁻¹ + lower(γ⁻¹). +This lets us rewrite the equation for K as + Δt⁻¹ * K * (diagonal(γ)⁻¹ + lower(γ⁻¹))ᵀ = F̂ + J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. +Multiplying through on the right by Δt * diagonal(γ)ᵀ and rearranging gives us + K - Δt * J * K * diagonal(γ)ᵀ = + Δt * (F̂ - Δt⁻¹ * K * lower(γ⁻¹)ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ) * diagonal(γ)ᵀ. +Taking this and the equation for Û⁺ back out of matrix form gives us + Û⁺ᵢ = u + ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ and + Kᵢ - Δt * γᵢᵢ * J * Kᵢ = + = Δt * γᵢᵢ * + (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ). +Thus, to optimize performance, we can redefine + Û⁺ᵢ := u + ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where + Kᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * Δt * γᵢᵢ * ( + f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ). + +## Enabling Limiters + +In the previous section, we changed the temporary variable from Fᵢ, which is an +approximation of f(Uᵢ, Tᵢ), to Kᵢ, which is a linear combination of previously +computed values of Fᵢ. +Now, we will change the temporary variable to Vᵢ, so that the approximations of +Uᵢ and u_next will be linear combinations of previously computed values of Vᵢ. +In other words, we will make it so that Û⁺ is a linear transformation of the +temporary variable, rather than an affine transformation, absorbing u into the +temporary variable. +So, consider a lower triangular s×s matrix β and an N×s matrix V such that + Û⁺ = V * βᵀ. +We can then rewrite the matrix equations as + V * βᵀ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and + F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. +Solving the first equation for F tells us that + F = Δt⁻¹ * V * (â⁻¹ * β)ᵀ - Δt⁻¹ * u ⊗ (â⁻¹ * 𝟙)ᵀ. +Substituting this into the second equation gives us + Δt⁻¹ * V * (â⁻¹ * β)ᵀ - Δt⁻¹ * u ⊗ (â⁻¹ * 𝟙)ᵀ = + = F̂ + J * V * (γ * â⁻¹ * β)ᵀ - J * u ⊗ (γ * â⁻¹ * 𝟙)ᵀ + + Δt * ḟ ⊗ (γ * 𝟙)ᵀ. +Multiplying through on the right by Δt * (β⁻¹ * â * γ⁻¹)ᵀ and rearranging +results in + V * (β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V = + = u ⊗ (β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ + + Δt * F̂ * (β⁻¹ * â * γ⁻¹)ᵀ + Δt² * ḟ ⊗ (β⁻¹ * â * 𝟙)ᵀ. +Since γ, â, and β are all lower triangular, β⁻¹ * â * γ⁻¹ * â⁻¹ * β is also +lower triangular, which means that + β⁻¹ * â * γ⁻¹ * â⁻¹ * β = + diagonal(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β). +In addition, we have that + diagonal(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) = + = diagonal(β⁻¹) * diagonal(â) * diagonal(γ⁻¹) * diagonal(â⁻¹) * + diagonal(β) = + = diagonal(β)⁻¹ * diagonal(â) * diagonal(γ)⁻¹ * diagonal(â)⁻¹ * + diagonal(β) = + = diagonal(γ)⁻¹. +Combining the last two expressions gives us + β⁻¹ * â * γ⁻¹ * â⁻¹ * β = diagonal(γ)⁻¹ + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β). +Substituting this into the last equation for V gives us + V * (diagonal(γ)⁻¹)ᵀ + V * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V = + = u ⊗ (β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ + + Δt * F̂ * (β⁻¹ * â * γ⁻¹)ᵀ + Δt² * ḟ ⊗ (β⁻¹ * â * 𝟙)ᵀ. +Multiplying through on the right by diagonal(γ)ᵀ and rearranging tells us that + V - Δt * J * V * diagonal(γ)ᵀ = + = u ⊗ (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - + Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ + + Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ + + Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ - + V * (diagonal(γ) * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β))ᵀ. +Since lower() preserves multiplication by a diagonal matrix, we can rewrite +this as + V - Δt * J * V * diagonal(γ)ᵀ = + = u ⊗ (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - + Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ + + Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ + + Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ - + V * lower(diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ. + +We will now show that this reformulation will not allow us to eliminate +multiplications by J, as the previous ones did. +If we wanted to factor out all multiplications by J when we convert back out of +matrix form, we would rearrange the last equation to get + (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) - Δt * J * (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) * diagonal(γ)ᵀ = + = u ⊗ ((diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙)ᵀ + + Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ + + Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ - + V * lower(diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ. +In order to apply limiters on an unscaled state, we would then require that the +coefficient of u on the right-hand side of this equation be 𝟙ᵀ; i.e., that + (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙 = 𝟙. +In general, this equation does not have a solution β; e.g., if γ = â = d * I for +some scalar constant d, then the equation simplifies to + (β⁻¹ - β⁻¹) * 𝟙 = 𝟙. +Even if we were to use more complex transformations, it would still be +impossible to eliminate multiplications by J while preserving an unscaled state +on the right-hand side. +For example, if we were to instead set Û⁺ = u ⊗ δᵀ + V * βᵀ for some vector δ of +length s, the above equation would become + (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * (δ - 𝟙) = 𝟙. +This also does not have a general solution. + +So, we must proceed without rearranging the last equation for V. +In order to apply limiters on an unscaled state, we must require that the +coefficient of u in that equation be 𝟙ᵀ, which implies that + diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙. + +We will now show that we cannot also make the coefficient of the J term on the +right-hand side be the same as the one on the left-hand side. +If we wanted this to be the case, we would also have to satisfy the equation + β⁻¹ * 𝟙 = 𝟙. +In general, we cannot simultaneously satisfy both of the last two equations; +e.g., if γ = d * I for some scalar constant d, we can rearrange the equations to +get that + β * 𝟙 = d * â * γ⁻¹ * â⁻¹ * 𝟙 and β * 𝟙 = 𝟙. +Unless d * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙 (which will not be the case in general), this +system of equations cannot be satisfied. + +So, we will only require that β satisfies the equation + diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙. +This equation has infinitely many solutions; the easiest way to obtain a +solution is to set + diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ = I. +This implies that + β⁻¹ = diagonal(γ)⁻¹ * â * γ * â⁻¹ and + β = â * γ⁻¹ * â⁻¹ * diagonal(γ). +Substituting this into the last equation for V gives us + V - Δt * J * V * diagonal(γ)ᵀ = + = u ⊗ 𝟙ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ + Δt * F̂ * âᵀ + + Δt² * ḟ ⊗ (â * γ * 𝟙)ᵀ - V * lower(β)ᵀ. +Taking this and the equation Û⁺ = V * βᵀ back out of matrix form gives us + Û⁺ᵢ = ∑_{j=1}^i βᵢⱼ * Vᵢ and + Vᵢ - Δt * γᵢᵢ * J * Vᵢ = + = u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ + + Δt * ∑_{j=1}^i âᵢⱼ * f(Ûⱼ, T̂ⱼ) + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ. +Thus, to enable the use of limiters, we can redefine + Û⁺ᵢ := ∑_{j=1}^i βᵢⱼ * Vᵢ, where + Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * ( + u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ + + Δt * ∑_{j=1}^i âᵢⱼ * f(Ûⱼ, T̂ⱼ) + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ + ). + +To actually use a limiter, we must replace the use of f(u, t) in the equation +for Vᵢ with a new function g(u₊, u, t, Δt), where + g(u₊, u, t, Δt) := u₊ + Δt * f(u, t). +Internally, g can use a different tendency function f̃(u, t), and it can apply a +limiter L after incrementing a state with the output of f̃, so that + g(u₊, u, t, Δt) = L(u₊ + Δt * f̃(u, t)). +Rewriting the last definition of Vᵢ in terms of g instead of f gives us + Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * g( + u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ + + Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) + + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ, + Ûᵢ, + T̂ᵢ + Δt * âᵢᵢ, + ). + +## Negating Wfact + +For some reason, OrdinaryDiffEq defines Wfact as Δt * γᵢᵢ * J - I, so we must +negate all of our temporary variables. + +For the original formulation, this means that + Ûᵢ := u - Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ, where + Fᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * ( + f(Ûᵢ, T̂ᵢ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ + + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ. +For the performance formulation, this means that + Û⁺ᵢ := u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where + Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * ( + f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + ). +For the limiters formulation, this means that + Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ, where + Vᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * g( + u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ + + Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) + + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ, + Ûᵢ, + T̂ᵢ + Δt * âᵢᵢ, + ). +=# +export RosenbrockAlgorithm + +import LinearAlgebra +import StaticArrays: SUnitRange, SOneTo +import Base: broadcasted, materialize! + +struct RosenbrockAlgorithm{γ, a, b, U, L, M, S} <: DistributedODEAlgorithm + update_jac::U + linsolve::L + multiply!::M + set_Δtγ!::S +end +function RosenbrockAlgorithm{γ, a, b}(; + update_jac::U = UpdateEvery(NewStep()), + linsolve::L, + multiply!::M = nothing, + set_Δtγ!::S = nothing, +) where {γ, a, b, U, L, M, S} + check_valid_parameters(RosenbrockAlgorithm{γ, a, b, U}) + return RosenbrockAlgorithm{γ, a, b, U, L, M, S}( + update_jac, + linsolve, + multiply!, + set_Δtγ!, + ) end -struct RosenbrockCache{Nstages, RT, N², A} - tableau::RosenbrockTableau{Nstages, RT, N²} - U::A - fU::A - k::NTuple{Nstages, A} - W - linsolve! +num_stages(::Type{<:RosenbrockAlgorithm{γ}}) where {γ} = size(γ, 1) + +function check_valid_parameters( + ::Type{<:RosenbrockAlgorithm{γ, a, b, U}}, +) where {γ, a, b, U} + γ === lower_plus_diagonal(γ) || + error("γ must be a lower triangular matrix") + a === lower(a) || + error("a must be a strictly lower triangular matrix") + LinearAlgebra.det(γ) != 0 || + error("non-invertible matrices γ are not currently supported") + if U != UpdateEvery{NewStage} + diagonal(γ) === typeof(γ)(γ[1, 1] * I) || + error("γ must have a uniform diagonal when \ + update_jac != UpdateEvery(NewStage())") + end + can_handle(U, NewStep()) || can_handle(U, NewStage()) || + error("update_jac must be able to handle NewStep() or NewStage()") +end +function check_valid_parameters( + ::Type{<:RosenbrockAlgorithm{γ, a, b, U, L, M, S}}, + ::Type{<:ForwardEulerODEFunction}, +) where {γ, a, b, U, L, M, S} + â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b)) + LinearAlgebra.det(â) != 0 || + error("non-invertible matrices â are not currently supported when \ + using ForwardEulerODEFunction") + M != Nothing || + error("multiply! must be specified when using ForwardEulerODEFunction") + S != Nothing || + error("set_Δtγ! must be specified when using ForwardEulerODEFunction") end +check_valid_parameters(_, _) = true +struct RosenbrockCache{C, U, L} + _cache::C + update_jac_cache::U + linsolve!::L +end + +# TODO: Minimize allocations by reusing temporary variables after they are no +# longer needed. function cache( prob::DiffEqBase.AbstractODEProblem, - alg::RosenbrockAlgorithm; kwargs...) + alg::RosenbrockAlgorithm; + kwargs... +) + check_valid_parameters(typeof(alg), typeof(prob.f)) + + s = num_stages(typeof(alg)) + u_prototype = prob.u0 + W_prototype = prob.f.jac_prototype + increment_mode = prob.f isa ForwardEulerODEFunction - tab = tableau(alg, eltype(prob.u0)) - Nstages = length(tab.m) - U = zero(prob.u0) - fU = zero(prob.u0) - k = ntuple(n -> similar(prob.u0), Nstages) - W = prob.f.jac_prototype - linsolve! = alg.linsolve(Val{:init}, W, prob.u0; kwargs...) + _cache = NamedTuple(( + :Û⁺ᵢ => similar(u_prototype), + (increment_mode ? (:Fs => map(i -> similar(u_prototype), 1:s),) : ())..., + (increment_mode ? :Vs : :Ks) => map(i -> similar(u_prototype), 1:s), + :W => similar(W_prototype), + :ḟ => similar(u_prototype), + )) - return RosenbrockCache(tab, U, fU, k, W, linsolve!) + update_jac_cache = allocate_cache(alg.update_jac) + + linsolve! = alg.linsolve(Val{:init}, W_prototype, u_prototype) + + return RosenbrockCache(_cache, update_jac_cache, linsolve!) end +step_u!(integrator, cache::RosenbrockCache) = + rosenbrock_step_u!(integrator, cache, integrator.prob.f) + +# The precomputed values are too complicated for constant propagation, so we use +# @generated to force the values to be compile-time constants. +@generated function precomputed_values( + ::Type{<:RosenbrockAlgorithm{γ, a, b}}, + _, +) where {γ, a, b} + â = vcat(a[2:end, :], transpose(b)) + γ⁻¹ = lower_triangular_inv(γ) + γ𝟙 = vec(sum(γ, dims = 2)) + a𝟙 = vec(sum(a, dims = 2)) + matrices = + map(to_enumerated_rows, (; lowerγ⁻¹ = lower(γ⁻¹), âγ⁻¹ = â * γ⁻¹)) + values = (; matrices..., diagγ = diag(γ), γ𝟙, a𝟙) + return :($values) +end +@generated function precomputed_values( + ::Type{<:RosenbrockAlgorithm{γ, a, b}}, + ::Type{<:ForwardEulerODEFunction}, +) where {γ, a, b} + â = vcat(a[2:end, :], transpose(b)) + âγ = â * γ + β = â * lower_triangular_inv(âγ) * diagonal(γ) + a𝟙 = vec(sum(a, dims = 2)) + âγ𝟙 = vec(sum(âγ, dims = 2)) + β⁻¹𝟙 = vec(sum(lower_triangular_inv(β), dims = 2)) + matrices = + map(to_enumerated_rows, (; lowerâ = lower(â), lowerβ = lower(β), β)) + values = (; matrices..., diagγ = diag(γ), diagâ = diag(â), a𝟙, âγ𝟙, β⁻¹𝟙) + return :($values) +end -function step_u!(int, cache::RosenbrockCache{Nstages, RT}) where {Nstages, RT} - tab = cache.tableau - - f! = int.prob.f - Wfact_t! = int.prob.f.Wfact_t - - u = int.u - p = int.p - t = int.t - dt = int.dt - W = cache.W - U = cache.U - fU = cache.fU - k = cache.k - linsolve! = cache.linsolve! - - # 1) compute jacobian factorization - γ = dt * tab.Γ[1,1] - Wfact_t!(W, u, p, γ, t) - for i in 1:Nstages - U .= u - for j = 1:i-1 - U .+= tab.A[i,j] .* k[j] - end - # TODO: there should be a time modification here (t + c * dt) - # if f does depend on time, would need to add tgrad term as well - f!(fU, U, p, t) - for j = 1:i-1 - fU .+= (tab.C[i,j] / dt) .* k[j] - end - linsolve!(k[i], W, fU) +function rosenbrock_step_u!(integrator, cache, f) + (; u, p, t, dt, alg) = integrator + (; update_jac) = alg + (; update_jac_cache, linsolve!) = cache + (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache + (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, a𝟙) = + precomputed_values(typeof(alg), typeof(f)) + function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ) + f.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ) + !isnothing(f.tgrad) && f.tgrad(ḟ, Ûᵢ, p, T̂ᵢ) end - for i = 1:Nstages - u .+= tab.m[i] .* k[i] + function stage_func(::Val{i}) where {i} + Δtγᵢᵢ = dt * diagγ[i] + + Ûᵢ = i == 1 ? u : Û⁺ᵢ # Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁ + T̂ᵢ = i == 1 ? t : t + dt * a𝟙[i] # T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ + + run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ) + + # Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * ( + # f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ + # ) + f(Ks[i], Ûᵢ, p, T̂ᵢ) + lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks) + Ks[i] .= Δtγᵢᵢ .* broadcasted( + +, + Ks[i], + (isnothing(lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, lγ⁻¹ᵢⱼKⱼ, dt),))..., + (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))..., + ) + linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing + + # Û⁺ᵢ := u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ + âγ⁻¹ᵢⱼKⱼ = linear_combination(âγ⁻¹[i], Ks) + Û⁺ᵢ .= isnothing(âγ⁻¹ᵢⱼKⱼ) ? u : broadcasted(-, u, âγ⁻¹ᵢⱼKⱼ) end -end -struct SSPKnoth{L} <: RosenbrockAlgorithm - linsolve::L + run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1]) + foreachval(stage_func, Val(num_stages(typeof(alg)))) + u .= Û⁺ᵢ # u_next := Û⁺ₛ end -SSPKnoth(;linsolve)=SSPKnoth(linsolve) - - -function tableau(::SSPKnoth, RT) - # ROS.transformed=true; - N = 3 - N² = N*N - α = @SMatrix RT[ - 0 0 0; - 1 0 0; - 1/4 1/4 0] - # ROS.d=ROS.alpha*ones(ROS.nStage,1); - b = @SMatrix RT[1/6 1/6 2/3] - Γ = @SMatrix RT[ - 1 0 0; - 0 1 0; - -3/4 -3/4 1] - A = α / Γ - C = -inv(Γ) - m = b / Γ - return RosenbrockTableau{N, RT, N²}(A, C, Γ, m) -# ROS.SSP.alpha=[1 0 0 -# 3/4 1/4 0 -# 1/3 0 2/3]; +function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction) + (; u, p, t, dt, alg) = integrator + (; update_jac, multiply!, set_Δtγ!) = alg + (; update_jac_cache, linsolve!) = cache + (; Û⁺ᵢ, Vs, Fs, W, ḟ) = cache._cache + (; lowerâ, lowerβ, β, diagγ, diagâ, a𝟙, âγ𝟙, β⁻¹𝟙) = + precomputed_values(typeof(alg), typeof(g)) + function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ) + g.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ) + !isnothing(g.tgrad) && g.tgrad(ḟ, Ûᵢ, p, T̂ᵢ) + end + function stage_func(::Val{i}) where {i} + Δtγᵢᵢ = dt * diagγ[i] + Δtâᵢᵢ = dt * diagâ[i] + + Ûᵢ = i == 1 ? u : Û⁺ᵢ # Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁ + T̂ᵢ = i == 1 ? t : t + dt * a𝟙[i] # T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ + + run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ) + + # Vᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * g( + # u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ + + # Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) + + # Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ, + # Ûᵢ, + # T̂ᵢ + # Δt * âᵢᵢ, + # ) + set_Δtγ!(W, Δtγᵢᵢ * β⁻¹𝟙[i], Δtγᵢᵢ) + multiply!(Vs[i], W, u) + set_Δtγ!(W, Δtγᵢᵢ, Δtγᵢᵢ * β⁻¹𝟙[i]) + lâᵢⱼFⱼ = linear_combination(lowerâ[i], Fs) + Vs[i] .= broadcasted( + +, + broadcasted(-, Vs[i]), + (isnothing(lâᵢⱼFⱼ) ? () : (broadcasted(*, dt, lâᵢⱼFⱼ),))..., + (isnothing(g.tgrad) ? () : (broadcasted(*, dt^2 * âγ𝟙[i], ḟ),))..., + linear_combination_terms(lowerβ[i], Vs)..., + ) + Fs[i] .= Vs[i] + g(Vs[i], Ûᵢ, p, T̂ᵢ, Δtâᵢᵢ) + Fs[i] .= (Vs[i] .- Fs[i]) ./ Δtâᵢᵢ + linsolve!(Vs[i], W, Vs[i]) # assume that linsolve! can handle aliasing + + # Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ + Û⁺ᵢ .= broadcasted(-, linear_combination(β[i], Vs)) + end + + run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1]) + foreachval(stage_func, Val(num_stages(typeof(alg)))) + u .= Û⁺ᵢ # u_next := Û⁺ₛ end diff --git a/src/solvers/rosenbrock_tableaus.jl b/src/solvers/rosenbrock_tableaus.jl new file mode 100644 index 00000000..33d3336a --- /dev/null +++ b/src/solvers/rosenbrock_tableaus.jl @@ -0,0 +1,186 @@ +export Rosenbrock23, SSPKnoth, RODASP2 +export ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3 + +using StaticArrays: @SArray + +#= +Rosenbrock23 in OrdinaryDiffEq.jl and ode23s in MATLAB + +Rosenbrock-W method with 2 stages and 2nd order convergence + +From Section 4.1 of "The MATLAB ODE Suite" by L. F. Shampine and M. W. Reichelt + +The paper does not directly provide the method coefficients, but, converting to +our notation, it specifies that + Û₁ = u, T̂₁ = t, + F₁ = (I - Δt * 1/(2+√2) * J)⁻¹ * (f(Û₁, T̂₁) + Δt * 1/(2+√2) * ḟ), + Û₂ = u + Δt * 1/2 * F₁, T̂₂ = t + Δt * 1/2, + F₂ = (I - Δt * 1/(2+√2) * J)⁻¹ * (f(Û₂, T̂₂) - F₁ + Δt * 0 * ḟ) + F₁, and + u_next = u + Δt * (0 * F₁ + 1 * F₂) +This implies the following table of coefficients: +=# +const Rosenbrock23 = RosenbrockAlgorithm{ + @SArray([ + 1/(2+√2) 0; + -1/(2+√2) 1/(2+√2); + ]), + @SArray([ + 0 0; + 1/2 0; + ]), + @SArray([0, 1]), +} + +#= +Rosenbrock-W method with 3 stages and 2nd order convergence from Oswald Knoth +=# +const SSPKnoth = RosenbrockAlgorithm{ + @SArray([ + 1 0 0; + 0 1 0; + -3/4 -3/4 1; + ]), + @SArray([ + 0 0 0; + 1 0 0; + 1/4 1/4 0; + ]), + @SArray([1/6, 1/6, 2/3]), +} + +#= +An improved version of RODASP, which is itself an improved version of RODAS + +Rosenbrock-W method with 6 stages and 4th order convergence (reduces to 2nd +order for inexact Jacobians) + +From Table 3 of "Improvement of Rosenbrock-Wanner Method RODASP" by G. +Steinebach + +The paper calls a "α" and specifies β = α + γ instead of γ. +=# +const RODASP2 = begin + α = @SArray([ + 0 0 0 0 0 0; + 3/4 0 0 0 0 0; + 3.688749816109670e-1 -4.742684759792117e-2 0 0 0 0; + 4.596170083041160e-1 2.724432453018110e-1 -2.123145213282008e-1 0 0 0; + 2.719770298548111e+0 1.358873794835473e+0 -2.838824065018641e+0 -2.398200283649438e-1 0 0; + -6.315720511779362e-1 -3.326966988718489e-1 1.154688683864918e+0 5.595800661848674e-1 1/4 0; + ]) + β = @SArray([ + 1/4 0 0 0 0 0; + 0 1/4 0 0 0 0; + -9.184372116108780e-2 -2.624106318888223e-2 1/4 0 0 0; + -5.817702768270960e-2 -1.382129630513952e-1 5.517478318046004e-1 1/4 0 0; + -6.315720511779359e-1 -3.326966988718489e-1 1.154688683864917e+0 5.595800661848674e-1 1/4 0; + 1.464968119068509e-1 8.896159691002870e-2 1.648843942975147e-1 4.568000540284631e-1 -1.071428571428573e-1 1/4; + ]) + RosenbrockAlgorithm{ + β - α, + α, + vec(β[end, :]), + } +end + +################################################################################ + +#= +Methods from "New Rosenbrock W-Methods of Order 3 for Partial Differential +Algebraic Equations of Index 1" by J Rang and L. Angermann + +Each method is named ROS3[4][P][w/W][...], where "ROS3" indicates a Rosenbrock +method of order 3, "4" indicates that the method has 4 stages (otherwise, it has +3 stages), "P" indicates that the method is suited for parabolic problems, "w" +or "W" indicates that the method can handle an inexact Jacobian, and there is an +identifying string of numbers and symbols at the ends of some names. + +ROS3w and ROS3Pw both reduce to order 2 for inexact Jacobians. +ROS34PW3 is actually of order 4 but reduces to order 3 for inexact Jacobians. +=# +const ROS3w = RosenbrockAlgorithm{ + @SArray([ + 4.358665215084590e-1 0 0; + 3.635068368900681e-1 4.358665215084590e-1 0; + −8.996866791992636e-1 −1.537997822626885e-1 4.358665215084590e-1; + ]), + @SArray([ + 0 0 0; + 2/3 0 0; + 2/3 0 0; + ]), + @SArray([1/4, 1/4, 1/2]), +} +const ROS3Pw = RosenbrockAlgorithm{ + @SArray([ + 7.8867513459481287e-1 0 0; + −1.5773502691896257e+0 7.8867513459481287e-1 0; + −6.7075317547305480e-1 −1.7075317547305482e-1 7.8867513459481287e-1; + ]), + @SArray([ + 0 0 0; + 1.5773502691896257e+0 0 0; + 1/2 0 0; + ]), + @SArray([1.0566243270259355e-1, 4.9038105676657971e-2, 8.4529946162074843e-1]), +} +const ROS34PW1a = RosenbrockAlgorithm{ + @SArray([ + 4.358665215084590e-1 0 0 0; + −2.218787467653286e+0 4.358665215084590e-1 0 0; + −9.461966143940745e-2 −7.913526735718213e-3 4.358665215084590e-1 0; + −1.870323744195384e+0 −9.624340112825115e-2 2.726301276675511e-1 4.358665215084590e-1; + ]), + @SArray([ + 0 0 0 0; + 2.218787467653286e+0 0 0 0; + 0 0 0 0; + 1.208587690772214e+0 7.511610241919324e-2 1/2 0; + ]), + @SArray([3.285609536316354e-1, −5.785609536316354e-1, 1/4, 1]), +} +const ROS34PW1b = RosenbrockAlgorithm{ + @SArray([ + 4.358665215084590e-1 0 0 0; + −2.218787467653286e+0 4.358665215084590e-1 0 0; + −2.848610224639349e+0 −5.267530183845237e-2 4.358665215084590e-1 0; + −1.128167857898393e+0 −1.677546870499461e-1 5.452602553351021e-2 4.358665215084590e-1; + ]), + @SArray([ + 0 0 0 0; + 2.218787467653286e+0 0 0 0; + 2.218787467653286e+0 0 0 0; + 1.453923375357884e+0 0 1/10 0; + ]), + @SArray([5.495647928937977e-1, −5.507258170857301e-1, 1/4, 7.511610241919324e-1]), +} +const ROS34PW2 = RosenbrockAlgorithm{ + @SArray([ + 4.3586652150845900e-1 0 0 0; + −8.7173304301691801e-1 4.3586652150845900e-1 0 0; + −9.0338057013044082e-1 5.4180672388095326e-2 4.3586652150845900e-1 0; + 2.4212380706095346e-1 −1.2232505839045147e+0 5.4526025533510214e-1 4.3586652150845900e-1; + ]), + @SArray([ + 0 0 0 0; + 8.7173304301691801e-1 0 0 0; + 8.4457060015369423e-1 −1.1299064236484185e-1 0 0; + 0 0 1 0; + ]), + @SArray([2.4212380706095346e-1, −1.2232505839045147e+0, 1.5452602553351020e+0, 4.3586652150845900e-1]), +} +const ROS34PW3 = RosenbrockAlgorithm{ + @SArray([ + 1.0685790213016289e+0 0 0 0; + −2.5155456020628817e+0 1.0685790213016289e+0 0 0; + −8.7991339217106512e-1 −9.6014187766190695e-1 1.0685790213016289e+0 0; + −4.1731389379448741e-1 4.1091047035857703e-1 −1.3558873204765276e+0 1.0685790213016289e+0; + ]), + @SArray([ + 0 0 0 0; + 2.5155456020628817e+0 0 0 0; + 5.0777280103144085e-1 3/4 0 0; + 1.3959081404277204e-1 −3.3111001065419338e-1 8.2040559712714178e-1 0; + ]), + @SArray([2.2047681286931747e-1, 2.7828278331185935e-3, 7.1844787635140066e-3, 7.6955588053404989e-1]), +} diff --git a/src/solvers/update_signal_handler.jl b/src/solvers/update_signal_handler.jl new file mode 100644 index 00000000..e22ba36e --- /dev/null +++ b/src/solvers/update_signal_handler.jl @@ -0,0 +1,46 @@ +export UpdateSignal, NewStep, NewStage, NewNewtonIteration +export UpdateSignalHandler, UpdateEvery, UpdateEveryN, can_handle + +abstract type UpdateSignal end +struct NewStep <: UpdateSignal end +struct NewStage <: UpdateSignal end +struct NewNewtonIteration <: UpdateSignal end + +abstract type UpdateSignalHandler end +struct UpdateEvery{U <: UpdateSignal} <: UpdateSignalHandler + update_signal::U +end +struct UpdateEveryN{U <: UpdateSignal, R <: Union{Nothing, UpdateSignal}} <: + UpdateSignalHandler + update_signal::U + n::Int + reset_n_signal::R +end +UpdateEveryN(update_signal, n, reset_n_signal = nothing) = + UpdateEveryN(update_signal, n, reset_n_signal) + +can_handle(::Type{<:UpdateSignalHandler}, ::UpdateSignal) = false +can_handle(::Type{UpdateEvery{U}}, ::U) where {U <: UpdateSignal} = true +can_handle(::Type{UpdateEveryN{U}}, ::U) where {U <: UpdateSignal} = true + +allocate_cache(::UpdateSignalHandler) = (;) +allocate_cache(::UpdateEveryN) = (; n = Ref(0)) + +run!(alg::UpdateSignalHandler, cache, ::UpdateSignal, f!, args...) = false +function run!(alg::UpdateEvery{U}, cache, ::U, f!, args...) where {U <: UpdateSignal} + f!(args...) + return true +end +function run!(alg::UpdateEveryN{U}, cache, ::U, f!, args...) where {U <: UpdateSignal} + cache.n[] += 1 + if cache.n[] == alg.n + f!(args...) + cache.n[] = 0 + return true + end + return false +end +function run!(alg::UpdateEveryN{U, R}, cache, ::R, f!, args...) where {U, R <: UpdateSignal} + cache.n[] = 0 + return false +end diff --git a/test/Project.toml b/test/Project.toml index 638e2fa2..ffd67bd2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,12 +1,19 @@ [deps] BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e" DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[compat] +OrdinaryDiffEq = "5.64.0" diff --git a/test/comparison.jl b/test/comparison.jl new file mode 100644 index 00000000..8c4a1333 --- /dev/null +++ b/test/comparison.jl @@ -0,0 +1,32 @@ +using BenchmarkTools +import OrdinaryDiffEq # use import avoid namespace conflicts + +@testset "Compare with OrdinaryDiffEq" begin + (; t_end, probs, analytic_sol) = ark_analytic_sys + FT = typeof(t_end) + tendency_prob = probs[1] + dt = t_end / 2^7 + + cts_alg = Rosenbrock23(; linsolve = linsolve_direct) + ode_alg = OrdinaryDiffEq.Rosenbrock23(; linsolve = linsolve_direct) + + cts_tendency_end_sol = solve(deepcopy(tendency_prob), cts_alg; dt).u[end] + ode_tendency_end_sol = + solve(deepcopy(tendency_prob), ode_alg; dt, adaptive = false).u[end] + @test norm(cts_tendency_end_sol .- ode_tendency_end_sol) < eps(FT) + + @info "Benchmark Results for ClimaTimeSteppers.Rosenbrock23:" + cts_trial = @benchmark solve($(deepcopy(tendency_prob)), $cts_alg, dt = $dt) + display(cts_trial) + + @info "Benchmark Results for OrdinaryDiffEq.Rosenbrock23:" + ode_trial = @benchmark solve( + $(deepcopy(tendency_prob)), + $ode_alg, + dt = $dt, + adaptive = false, + ) + display(cts_trial) + + @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.1 +end diff --git a/test/convergence.jl b/test/convergence.jl index 3c114ece..56c7c463 100644 --- a/test/convergence.jl +++ b/test/convergence.jl @@ -1,67 +1,69 @@ -using DiffEqBase, ClimaTimeSteppers, LinearAlgebra, Test +using ClimaTimeSteppers, LinearAlgebra, Printf, Plots, Test include("utils.jl") -dts = 0.5 .^ (4:7) +dt_fracs = 0.5 .^ (4:7) -for (prob, sol, tscale) in [ - (linear_prob, linear_sol, 1) - (sincos_prob, sincos_sol, 1) +for (prob, sol) in [ + (linear_prob, linear_sol) + (sincos_prob, sincos_sol) ] + dts = dt_fracs .* prob.tspan[end] - @test convergence_order(prob, sol, LSRKEulerMethod(), dts.*tscale) ≈ 1 atol=0.1 - @test convergence_order(prob, sol, LSRK54CarpenterKennedy(), dts.*tscale) ≈ 4 atol=0.05 - @test convergence_order(prob, sol, LSRK144NiegemannDiehlBusch(), dts.*tscale) ≈ 4 atol=0.05 - - @test convergence_order(prob, sol, SSPRK22Heuns(), dts.*tscale) ≈ 2 atol=0.05 - @test convergence_order(prob, sol, SSPRK22Ralstons(), dts.*tscale) ≈ 2 atol=0.05 - @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts.*tscale) ≈ 3 atol=0.05 - @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts.*tscale) ≈ 3 atol=0.05 -end - -for (prob, sol, tscale) in [ - (linear_prob_wfactt, linear_sol, 1) -] - @test convergence_order(prob, sol, SSPKnoth(linsolve=linsolve_direct), dts.*tscale) ≈ 2 atol=0.05 + @test convergence_order(prob, sol, LSRKEulerMethod(), dts) ≈ 1 atol=0.1 + @test convergence_order(prob, sol, LSRK54CarpenterKennedy(), dts) ≈ 4 atol=0.05 + @test convergence_order(prob, sol, LSRK144NiegemannDiehlBusch(), dts) ≈ 4 atol=0.05 + @test convergence_order(prob, sol, SSPRK22Heuns(), dts) ≈ 2 atol=0.05 + @test convergence_order(prob, sol, SSPRK22Ralstons(), dts) ≈ 2 atol=0.05 + @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts) ≈ 3 atol=0.05 + @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts) ≈ 3 atol=0.05 end # ForwardEulerODEFunction -for (prob, sol, tscale) in [ - (linear_prob_fe, linear_sol, 1) - (sincos_prob_fe, sincos_sol, 1) +for (prob, sol) in [ + (linear_prob_fe, linear_sol) + (sincos_prob_fe, sincos_sol) ] - @test convergence_order(prob, sol, SSPRK22Heuns(), dts.*tscale) ≈ 2 atol=0.05 - @test convergence_order(prob, sol, SSPRK22Ralstons(), dts.*tscale) ≈ 2 atol=0.05 - @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts.*tscale) ≈ 3 atol=0.05 - @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts.*tscale) ≈ 3 atol=0.05 + dts = dt_fracs .* prob.tspan[end] + + @test convergence_order(prob, sol, SSPRK22Heuns(), dts) ≈ 2 atol=0.05 + @test convergence_order(prob, sol, SSPRK22Ralstons(), dts) ≈ 2 atol=0.05 + @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts) ≈ 3 atol=0.05 + @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts) ≈ 3 atol=0.05 end -@testset "IMEX ARK Algorithms" begin +@testset "IMEX ARK Methods" begin algs1 = (ARS111, ARS121) algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b) algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a) algs2 = (algs2..., IMKG254b, IMKG254c, HOMMEM1) algs3 = (ARS233, ARS343, ARS443, IMKG342a, IMKG343a, DBM453) - for (algorithm_names, order) in ((algs1, 1), (algs2, 2), (algs3, 3)) - for algorithm_name in algorithm_names - for (problem, solution) in ( - (split_linear_prob_wfact_split, linear_sol), - (split_linear_prob_wfact_split_fe, linear_sol), - ) - algorithm = algorithm_name( - NewtonsMethod(; linsolve = linsolve_direct, max_iters = 1), - ) # the problem is linear, so more iters have no effect - @test isapprox( - convergence_order(problem, solution, algorithm, dts), - order; - rtol = 0.01, - ) - end - end - end + dict = Dict(((algs1 .=> 1)..., (algs2 .=> 2)..., (algs3 .=> 3)...)) + test_algs("IMEX ARK", dict, ark_analytic_sys, 6) + test_algs("IMEX ARK", dict, ark_analytic_nonlin, 9) + # For some bizarre reason, ARS121 converges with order 2 for ark_analytic, + # even though it is only a 1st order method. + dict′ = copy(dict) + dict′[ARS121] = 2 + test_algs("IMEX ARK", dict′, ark_analytic, 14) +end + +@testset "Rosenbrock Methods" begin + algs2 = (Rosenbrock23, SSPKnoth) + algs3 = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2) + algs4 = (RODASP2, ROS34PW3) + dict = Dict((algs2 .=> 2)..., (algs3 .=> 3)..., (algs4 .=> 4)...) + args = (; no_increment_algs = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b)) + test_algs("Rosenbrock", dict, ark_analytic_sys, 6; args...) + test_algs("Rosenbrock", dict, ark_analytic_nonlin, 9; args...) + # For ark_analytic, there is no range where all the methods pass the test. + test_algs("Rosenbrock Order 2", Dict(algs2 .=> 2), ark_analytic, 14) + dict34 = Dict((algs3 .=> 3)..., ROS34PW3 => 4) + test_algs("Rosenbrock Order 3&4", dict34, ark_analytic, 12; args...) + test_algs("RODASP2", Dict(RODASP2 => 4), ark_analytic, 8) end #= diff --git a/test/problems.jl b/test/problems.jl index b314789a..c4a8ea83 100644 --- a/test/problems.jl +++ b/test/problems.jl @@ -36,7 +36,7 @@ linear_prob = IncrementingODEProblem{true}( linear_prob_fe = ODEProblem( ForwardEulerODEFunction((un,u,p,t,dt) -> (un .= u .+ dt .* p .* u)), [1.0],(0.0,1.0),-0.2) - + linear_prob_wfactt = ODEProblem( ODEFunction( (du,u,p,t,α=true,β=false) -> (du .= α .* p .* u .+ β .* du); @@ -117,10 +117,11 @@ imex_autonomous_prob = SplitODEProblem( ArrayType([0.5]), (0.0,1.0), 4.0) function linsolve_direct(::Type{Val{:init}}, f, u0; kwargs...) - function _linsolve!(x, A, b, update_matrix = false; kwargs...) - x .= A \ b - end + _linsolve!(x, A, b, update_matrix = false; kwargs...) = x .= A \ b end +multiply_direct!(b, A, x) = mul!(b, A, x) +set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) = + A .= (A .+ I(size(A, 1))) .* (Δtγ_new / Δtγ_old) .- I(size(A, 1)) imex_autonomous_prob_jac = ODEProblem( ODEFunction( @@ -218,3 +219,125 @@ kpr_singlerate_prob = IncrementingODEProblem{true}( (dQ,Q,param,t,α=1,β=0) -> (dQ .= α .* kpr_rhs(Q,param,t) .+ β .* dQ), [sqrt(4), sqrt(3)], (0.0, 5π/2), kpr_param, ) + +struct IntegratorTestCase{FT, P, S, A} + test_name::String + linear_implicit::Bool + t_end::FT + probs::P + split_probs::S + analytic_sol::A +end + +# From Section 1.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds +ark_analytic = let + FT = Float64 + λ = FT(-100) # increase magnitude for more stiffness + Y₀ = FT[0] + t_end = FT(10) + + source(t) = 1 / (1 + t^2) - λ * atan(t) + tendency!(Yₜ, Y, _, t) = Yₜ .= λ .* Y .+ source(t) + increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt .* (λ .* Y .+ source(t)) + implicit_tendency!(Yₜ, Y, _, t) = Yₜ .= λ .* Y + explicit_tendency!(Yₜ, Y, _, t) = Yₜ .= source(t) + implicit_increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= (Δt * λ) .* Y + explicit_increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt * source(t) + + Wfact!(W, Y, _, Δt, t) = W .= Δt * λ - 1 + tgrad!(∂Y∂t, Y, _, t) = ∂Y∂t .= -(λ + 2 * t + λ * t^2) / (1 + t^2)^2 + analytic_sol(t) = [atan(t)] + + func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!) + tendency_func = ODEFunction(tendency!; func_args...) + increment_func = ForwardEulerODEFunction(increment!; func_args...) + split_tendency_func = SplitFunction( + ODEFunction(implicit_tendency!; func_args...), + explicit_tendency!, + ) + split_increment_func = SplitFunction( + ForwardEulerODEFunction(implicit_increment!; func_args...), + ForwardEulerODEFunction(explicit_increment!), + ) + + make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing) + IntegratorTestCase( + "ark_analytic", + true, + t_end, + (make_prob(tendency_func), make_prob(increment_func)), + (make_prob(split_tendency_func), make_prob(split_increment_func)), + analytic_sol, + ) +end + +# From Section 1.2 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds +ark_analytic_nonlin = let + FT = Float64 + Y₀ = FT[0] + t_end = FT(10) + + tendency!(Yₜ, Y, _, t) = Yₜ .= (t + 1) .* exp.(.-Y) + increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt .* ((t + 1) .* exp.(.-Y)) + no_tendency!(Yₜ, Y, _, t) = Yₜ .= zero(FT) + no_increment!(Y⁺, Y, _, t, Δt) = Y⁺ + + Wfact!(W, Y, _, Δt, t) = W .= (-Δt * (t + 1) .* exp.(.-Y) .- 1) + tgrad!(∂Y∂t, Y, _, t) = ∂Y∂t .= exp.(.-Y) + analytic_sol(t) = [log(t^2 / 2 + t + 1)] + + func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!) + tendency_func = ODEFunction(tendency!; func_args...) + split_tendency_func = SplitFunction(tendency_func, no_tendency!) + increment_func = ForwardEulerODEFunction(increment!; func_args...) + split_increment_func = + SplitFunction(increment_func, ForwardEulerODEFunction(no_increment!)) + + make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing) + IntegratorTestCase( + "ark_analytic_nonlin", + false, + t_end, + (make_prob(tendency_func), make_prob(increment_func)), + (make_prob(split_tendency_func), make_prob(split_increment_func)), + analytic_sol, + ) +end + +# From Section 5.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds +ark_analytic_sys = let + FT = Float64 + λ = FT(-100) # increase magnitude for more stiffness + V = FT[1 -1 1; -1 2 1; 0 -1 2] + V⁻¹ = FT[5 1 -3; 2 2 -2; 1 1 1] / 4 + D = Diagonal(FT[-1/2, -1/10, λ]) + A = V * D * V⁻¹ + I = LinearAlgebra.I(3) + Y₀ = FT[1, 1, 1] + t_end = FT(1 / 20) + + tendency!(Yₜ, Y, _, t) = mul!(Yₜ, A, Y) + increment!(Y⁺, Y, _, t, Δt) = mul!(Y⁺, A, Y, Δt, 1) + no_tendency!(Yₜ, Y, _, t) = Yₜ .= zero(FT) + no_increment!(Y⁺, Y, _, t, Δt) = Y⁺ + + Wfact!(W, Y, _, Δt, t) = W .= Δt .* A .- I + analytic_sol(t) = V * exp(D * t) * V⁻¹ * Y₀ + + func_args = (; jac_prototype = A, Wfact = Wfact!) + tendency_func = ODEFunction(tendency!; func_args...) + split_tendency_func = SplitFunction(tendency_func, no_tendency!) + increment_func = ForwardEulerODEFunction(increment!; func_args...) + split_increment_func = + SplitFunction(increment_func, ForwardEulerODEFunction(no_increment!)) + + make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing) + IntegratorTestCase( + "ark_analytic_sys", + true, + t_end, + (make_prob(tendency_func), make_prob(increment_func)), + (make_prob(split_tendency_func), make_prob(split_increment_func)), + analytic_sol, + ) +end diff --git a/test/runtests.jl b/test/runtests.jl index 5cc2e10f..3a471f58 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,7 @@ include("problems.jl") include("integrator.jl") include("convergence.jl") +include("comparison.jl") include("callbacks.jl") include("test_convergence_checker.jl") include("single_column_ARS_test.jl") diff --git a/test/utils.jl b/test/utils.jl index 3a5b43b6..4c96519c 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -48,3 +48,120 @@ function (::DirectSolver)(x,A,b,matrix_updated; kwargs...) M = mapslices(y -> mul!(similar(y), A, y), Matrix{eltype(x)}(I,n,n), dims=1) x .= M \ b end + +function test_algs( + algs_name, + algs_to_order, + test_case, + num_dt_splits; + num_saveat_splits = min(num_dt_splits, 8), + no_increment_algs = (), +) + (; test_name, linear_implicit, t_end, probs, split_probs, analytic_sol) = + test_case + FT = typeof(t_end) + linestyles = (:solid, :dash, :dot, :dashdot, :dashdotdot) + plot_kwargs = (; + size = (1000, 600), + margin = 3Plots.mm, + titlelocation = :left, + legend_position = :outerright, + palette = :glasbey_bw_minc_20_maxl_70_n256, + ) + + plot1_dt = t_end / 2^num_dt_splits + plot1_saveat = [FT(0), t_end / 2^num_saveat_splits] + # Ensure that the saveat times are an EXACT subset of the integrator times. + while plot1_saveat[end] < t_end + push!(plot1_saveat, min(plot1_saveat[end] + plot1_saveat[2], t_end)) + end + plot1a = plot(; + title = "Solution Norms of $algs_name Methods for `$test_name` \ + (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))", + xlabel = "t", + ylabel = "Solution Norm: ||Y_computed||", + plot_kwargs..., + ) + plot1b = plot(; + title = "Solution Errors of $algs_name Methods for `$test_name` \ + (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))", + xlabel = "t", + ylabel = "Error Norm: ||Y_computed - Y_analytic||", + yscale = :log10, + plot_kwargs..., + ) + plot1b_ymin = typemax(FT) # dynamically set ylim because some errors are 0 + plot1b_ymax = typemin(FT) + + t_end_string = t_end % 1 == 0 ? string(Int(t_end)) : @sprintf("%.2f", t_end) + plot2_dts = [plot1_dt / 4, plot1_dt, plot1_dt * 4] + plot2 = plot(; + title = "Convergence Orders of $algs_name Methods for `$test_name` \ + (at t = $t_end_string)", + xlabel = "dt", + ylabel = "Error Norm: ||Y_computed - Y_analytic||", + xscale = :log10, + yscale = :log10, + plot_kwargs..., + ) + + analytic_sols = map(analytic_sol, plot1_saveat) + analytic_end_sol = [analytic_sols[end]] + sorted_algs_to_order = sort(collect(algs_to_order); by = x -> string(x[1])) + for (alg_name, predicted_order) in sorted_algs_to_order + if alg_name <: IMEXARKAlgorithm + max_iters = linear_implicit ? 1 : 2 + alg = + alg_name(NewtonsMethod(; linsolve = linsolve_direct, max_iters)) + (tendency_prob, increment_prob) = split_probs + elseif alg_name <: RosenbrockAlgorithm + alg = alg_name(; + linsolve = linsolve_direct, + multiply! = multiply_direct!, + set_Δtγ! = set_Δtγ_direct!, + ) + (tendency_prob, increment_prob) = probs + end + linestyle = linestyles[(predicted_order - 1) % length(linestyles) + 1] + + solve_args = (; dt = plot1_dt, saveat = plot1_saveat) + tendency_sols = + solve(deepcopy(tendency_prob), alg; solve_args...).u + tendency_norms = @. norm(tendency_sols) + tendency_errors = @. norm(tendency_sols - analytic_sols) + min_error = minimum(x -> x == 0 ? typemax(FT) : x, tendency_errors) + plot1b_ymin = min(plot1b_ymin, min_error) + plot1b_ymax = max(plot1b_ymax, maximum(tendency_errors)) + tendency_errors .= + max.(tendency_errors, eps(FT(0))) # plotting 0 breaks the log scale + label = alg_name + plot!(plot1a, plot1_saveat, tendency_norms; label, linestyle) + plot!(plot1b, plot1_saveat, tendency_errors; label, linestyle) + if !(alg_name in no_increment_algs) + increment_sols = + solve(deepcopy(increment_prob), alg; solve_args...).u + increment_errors = @. norm(increment_sols - tendency_sols) + @test maximum(increment_errors) < 10000 * eps(FT) broken = + alg_name == HOMMEM1 # TODO: why is this broken??? + end + + tendency_end_sols = map( + dt -> solve(deepcopy(tendency_prob), alg; dt).u[end], + plot2_dts, + ) + tendency_end_errors = @. norm(tendency_end_sols - analytic_end_sol) + _, computed_order = hcat(ones(length(plot2_dts)), log10.(plot2_dts)) \ + log10.(tendency_end_errors) + @test computed_order ≈ predicted_order rtol = 0.1 + label = "$alg_name ($(@sprintf "%.3f" computed_order))" + plot!(plot2, plot2_dts, tendency_end_errors; label, linestyle) + end + plot!(plot1b; ylim = (plot1b_ymin / 2, plot1b_ymax * 2)) + + mkpath("output") + file_suffix = "$(test_name)_$(lowercase(replace(algs_name, " " => "_")))" + savefig(plot1a, joinpath("output", "solutions_$(file_suffix).png")) + savefig(plot1b, joinpath("output", "errors_$(file_suffix).png")) + savefig(plot2, joinpath("output", "orders_$(file_suffix).png")) +end +