diff --git a/discussion.md b/discussion.md deleted file mode 100644 index ae9dd31..0000000 --- a/discussion.md +++ /dev/null @@ -1,8 +0,0 @@ -# Discussion - -## 2024/01/26 - -#### Yuwen -**A)** Try [CUDSS.jl](https://github.com/exanauts/CUDSS.jl) -**B)** Try the Factorized Sparse Approximate Inverse (**FSAI**) (parallel) preconditioner -**C)** Rough idea: Update the LDL factorization just in even iteration, use the initial factorized values as a warm-starting point to optimize (regarded as an inner optimization problem, similar idea to the parallel preconditioner). \ No newline at end of file diff --git a/src/Clarabel.jl b/src/Clarabel.jl index c4aee0f..ac72cf5 100644 --- a/src/Clarabel.jl +++ b/src/Clarabel.jl @@ -6,6 +6,7 @@ module Clarabel using CUDA, CUDA.CUBLAS # for GPU implementation const DefaultFloat = Float64 const DefaultInt = LinearAlgebra.BlasInt + const GPUsocSize = 5 # maximal size of second-order cones in GPU implementation # Rust-like Option type const Option{T} = Union{Nothing,T} @@ -93,6 +94,7 @@ module Clarabel include("./gpucones/coneops_powcone_gpu.jl") include("./gpucones/coneops_compositecone_gpu.jl") include("./gpucones/coneops_nonsymmetric_common_gpu.jl") + include("./gpucones/augment_socp.jl") #various algebraic utilities include("./utils/mathutils.jl") diff --git a/src/cones/cone_types.jl b/src/cones/cone_types.jl index d217804..06ef570 100644 --- a/src/cones/cone_types.jl +++ b/src/cones/cone_types.jl @@ -98,14 +98,12 @@ mutable struct SecondOrderCone{T} <: AbstractCone{T} function SecondOrderCone{T}(dim::Integer) where {T} - SOC_NO_EXPANSION_MAX_SIZE = 4 - dim >= 2 || throw(DomainError(dim, "dimension must be >= 2")) w = zeros(T,dim) λ = zeros(T,dim) η = zero(T) - if dim > SOC_NO_EXPANSION_MAX_SIZE + if dim > GPUsocSize sparse_data = SecondOrderConeSparseData{T}(dim) else sparse_data = nothing diff --git a/src/gpucones/augment_socp.jl b/src/gpucones/augment_socp.jl new file mode 100644 index 0000000..32a09ad --- /dev/null +++ b/src/gpucones/augment_socp.jl @@ -0,0 +1,145 @@ + +function count_soc(cone, size_soc) + + numel_cone = Clarabel.nvars(cone) + @assert(numel_cone > size_soc) + + num_socs = 1 + numel_cone -= size_soc-1 + + while (numel_cone > size_soc-1) + numel_cone -= size_soc-2 + num_socs += 1 + end + + num_socs += 1 + + return num_socs, numel_cone+1 +end + +#augment At,b for a large soc +function augment_data(At0, + b0::Vector{T}, + rng_row,size_soc,num_soc,last_size,augx_idx) where {T} + + At = At0[:,rng_row] + b = b0[rng_row] + (n,m) = size(At) + reduce_soc = size_soc - 2 + @assert(reduce_soc > 0) + + bnew = sizehint!(T[],m + 2*(num_soc-1)) + conenew = sizehint!(Clarabel.SupportedCone[],num_soc) + + Atnew = At[:,1] + push!(bnew,b[1]) + idx = 1 #complete index + for i in 1:num_soc + if i == num_soc + rng = idx+1:idx+last_size-1 + Atnew = hcat(Atnew,At[:,rng]) + @views append!(bnew,b[rng]) + push!(conenew, Clarabel.SecondOrderConeT(last_size)) + else + rng = idx+1:idx+reduce_soc + Atnew = hcat(Atnew,At[:,rng]) + @views append!(bnew,b[rng]) + push!(conenew, Clarabel.SecondOrderConeT(size_soc)) + + idx += reduce_soc + augx_idx += 1 + Atnew = hcat(Atnew,sparse([augx_idx, augx_idx],[1,2],[-1,-1],n,2)) + @views append!(bnew,[0,0]) + end + end + + return Atnew, bnew, conenew, augx_idx +end + +function augment_A_b(cones, + P, + q::Vector{T}, + A, + b, + size_soc, + num_socs, last_sizes, soc_indices, soc_starts) where {T} + + (m,n) = size(A) + + extra_dim = sum(num_socs) - length(num_socs) #Additional dimensionality for x + + At = vcat(SparseMatrixCSC(A'), spzeros(extra_dim,m)) #May be costly, but more efficient to add rows to a SparseCSR matrix + bnew = sizehint!(T[],m + 2*extra_dim) + conesnew = sizehint!(Clarabel.SupportedCone[],length(cones) + extra_dim) + + Atnew = spzeros(n+extra_dim,0) + + start_idx = 0 + end_idx = 0 + cone_idx = 0 + augx_idx = n #the pointer to the auxiliary x used so far + + for (i,ind) in enumerate(soc_indices) + + @views append!(conesnew, cones[cone_idx+1:ind-1]) + + numel_cone = Clarabel.nvars(cones[ind]) + + end_idx = soc_starts[i] + + rng = start_idx+1:end_idx + @views Atnew = hcat(Atnew, At[:,rng]) + @views append!(bnew,b[rng]) + + start_idx = end_idx + end_idx += numel_cone + rng_cone = start_idx+1:end_idx + + Ati,bi,conesi,augx_idx = augment_data(At, b, rng_cone,size_soc,num_socs[i],last_sizes[i],augx_idx) #augment the current large soc + + Atnew = hcat(Atnew, Ati) + append!(bnew,bi) + append!(conesnew,conesi) + + start_idx = end_idx + cone_idx = ind + end + + if (cone_idx< length(cones)) + @views Atnew = hcat(Atnew, At[:,start_idx+1:end]) + @views append!(bnew,b[start_idx+1:end]) + @views append!(conesnew, cones[cone_idx+1:end]) + end + + Pnew = hcat(vcat(P,spzeros(extra_dim,n)),spzeros(n+extra_dim,extra_dim)) + return Pnew,vcat(q,zeros(extra_dim)),SparseMatrixCSC(Atnew'), bnew, conesnew +end + +function expand_soc(cones,size_soc) + n_large_soc = 0 + soc_indices = sizehint!(Int[],length(cones)) #Indices of large second-order cones + soc_starts = sizehint!(Int[],length(cones)) #Starting index of each large second-order cone + num_socs = sizehint!(Int[],length(cones)) + last_sizes = sizehint!(Int[],length(cones)) #Size of the last expanded second-order cones + + cones_dim = 0 + for (i,cone) in enumerate(cones) + numel_cone = Clarabel.nvars(cone) + if isa(cone,Clarabel.SecondOrderConeT) && numel_cone > size_soc + append!(soc_indices, i) + append!(soc_starts, cones_dim) + + num_soc, last_size = count_soc(cone,size_soc) + append!(num_socs, num_soc) + append!(last_sizes, last_size) + n_large_soc += 1 + end + + cones_dim += numel_cone + end + + resize!(num_socs,n_large_soc) + resize!(last_sizes,n_large_soc) + + return num_socs, last_sizes, soc_indices, soc_starts +end diff --git a/src/kktsolvers/kktsolver_directldl_gpu.jl b/src/kktsolvers/kktsolver_directldl_gpu.jl index b612f8a..88d29b2 100644 --- a/src/kktsolvers/kktsolver_directldl_gpu.jl +++ b/src/kktsolvers/kktsolver_directldl_gpu.jl @@ -9,7 +9,7 @@ const CuVectorView{T} = SubArray{T, 1, AbstractVector{T}, Tuple{AbstractVector{I mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T} # problem dimensions - m::Int; n::Int; p::Int + m::Int; n::Int # Left and right hand sides for solves x::AbstractVector{T} @@ -58,16 +58,13 @@ mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T} #YC: update GPU map, should be removed later on mapgpu = GPUDataMap(P,A,cones,mapcpu) - #YC: disabled sparse expansion - #Need this many extra variables for sparse cones + #YC: disabled sparse expansion and preprocess a large second-order cone into multiple small cones p = pdim(mapcpu.sparse_maps) - if p > 0 - error("We don't support the augmented sparse cone at the moment!") - end + @assert(iszero(p)) #updates to the diagonal of KKT will be #assigned here before updating matrix entries - dim = m + n + p + dim = m + n #LHS/RHS/work for iterative refinement x = CuVector{T}(undef,dim) @@ -87,7 +84,7 @@ mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T} #the indirect linear solver engine GPUsolver = GPUsolverT{T}(KKTgpu,x,b) - return new(m,n,p,x,b, + return new(m,n,x,b, work1,work2,mapcpu,mapgpu, Dsigns, Hsblocks, @@ -147,22 +144,6 @@ function _update_diag_values_KKT!( end - -# #scale entries in the kktsolver object using the -# #given index into its CSC representation -# function _scale_values!( -# GPUsolver::AbstractGPUSolver{T}, -# KKT::SparseMatrixCSC{T,Ti}, -# index::AbstractVector{Ti}, -# scale::T -# ) where{T,Ti} - -# #Update values in the KKT matrix K -# @. KKT.nzval[index] *= scale - -# end - - function kktsolver_update!( kktsolver:: GPULDLKKTSolver{T}, cones::CompositeConeGPU{T} @@ -196,20 +177,6 @@ function _kktsolver_update_inner!( @. kktsolver.Hsblocks *= -one(T) _update_values!(GPUsolver,KKT,map.Hsblocks,kktsolver.Hsblocks) - #YC: disabled sparse expansion - # sparse_map_iter = Iterators.Stateful(map.sparse_maps) - - # updateFcn = (index,values) -> _update_values!(GPUsolver,KKTcpu,index,values) - # scaleFcn = (index,scale) -> _scale_values!(GPUsolver,KKTcpu,index,scale) - - # for cone in cones - # #add sparse expansions columns for sparse cones - # if @conedispatch is_sparse_expandable(cone) - # thismap = popfirst!(sparse_map_iter) - # _csc_update_sparsecone_full(cone,thismap,updateFcn,scaleFcn) - # end - # end - return _kktsolver_regularize_and_refactor!(kktsolver, GPUsolver) end @@ -286,11 +253,10 @@ function kktsolver_setrhs!( ) where {T} b = kktsolver.b - (m,n,p) = (kktsolver.m,kktsolver.n,kktsolver.p) + (m,n) = (kktsolver.m,kktsolver.n) b[1:n] .= rhsx b[(n+1):(n+m)] .= rhsz - b[(n+m+1):(n+m+p)] .= 0 CUDA.synchronize() diff --git a/src/problemdata.jl b/src/problemdata.jl index fc6981e..e9eaa98 100644 --- a/src/problemdata.jl +++ b/src/problemdata.jl @@ -29,6 +29,15 @@ function DefaultProblemData{T}( (A, b, cones) = (A_new, b_new, cones_new) end + # Preprocess large second-order cones + if settings.direct_solve_method === :cudss + size_soc = GPUsocSize + num_socs, last_sizes, soc_indices, soc_starts = expand_soc(cones,size_soc) + P_new,q_new,A_new,b_new,cones_new = augment_A_b(cones,P,q,A,b,size_soc,num_socs, last_sizes, soc_indices, soc_starts) + + (P, q, A, b, cones) = (P_new, q_new, A_new, b_new, cones_new) + end + # chordal decomposition : return nothing if disabled or no decomp # -------------------------------------- chordal_info = try_chordal_info(A,b,cones,settings) diff --git a/src/solution.jl b/src/solution.jl index db5c7f1..686a9f3 100644 --- a/src/solution.jl +++ b/src/solution.jl @@ -36,6 +36,10 @@ function solution_post_process!( if !isnothing(data.presolver) reverse_presolve!(data.presolver, solution, variables) + elseif settings.direct_solve_method === :cudss && (length(solution.x) != length(variables.x)) + lenx = length(solution.x) + @. solution.x = variables.x[1:lenx] #extra entries are slack variables for large second-order cones + println("Solve an augmented problem that only returns value x") else @. solution.x = variables.x @. solution.z = variables.z