diff --git a/discussion.md b/discussion.md
deleted file mode 100644
index ae9dd31..0000000
--- a/discussion.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Discussion
-
-## 2024/01/26
-
-#### Yuwen
-**A)** Try [CUDSS.jl](https://github.com/exanauts/CUDSS.jl) 
-**B)** Try the Factorized Sparse Approximate Inverse (**FSAI**) (parallel) preconditioner 
-**C)** Rough idea: Update the LDL factorization just in even iteration, use the initial factorized values as a warm-starting point to optimize (regarded as an inner optimization problem, similar idea to the parallel preconditioner). 
\ No newline at end of file
diff --git a/src/Clarabel.jl b/src/Clarabel.jl
index c4aee0f..ac72cf5 100644
--- a/src/Clarabel.jl
+++ b/src/Clarabel.jl
@@ -6,6 +6,7 @@ module Clarabel
     using CUDA, CUDA.CUBLAS # for GPU implementation
     const DefaultFloat = Float64
     const DefaultInt   = LinearAlgebra.BlasInt
+    const GPUsocSize   = 5  # maximal size of second-order cones in GPU implementation
 
     # Rust-like Option type
     const Option{T} = Union{Nothing,T} 
@@ -93,6 +94,7 @@ module Clarabel
     include("./gpucones/coneops_powcone_gpu.jl")
     include("./gpucones/coneops_compositecone_gpu.jl")
     include("./gpucones/coneops_nonsymmetric_common_gpu.jl")
+    include("./gpucones/augment_socp.jl")
 
     #various algebraic utilities
     include("./utils/mathutils.jl")
diff --git a/src/cones/cone_types.jl b/src/cones/cone_types.jl
index d217804..06ef570 100644
--- a/src/cones/cone_types.jl
+++ b/src/cones/cone_types.jl
@@ -98,14 +98,12 @@ mutable struct SecondOrderCone{T} <: AbstractCone{T}
 
     function SecondOrderCone{T}(dim::Integer) where {T}
 
-        SOC_NO_EXPANSION_MAX_SIZE = 4
-
         dim >= 2 || throw(DomainError(dim, "dimension must be >= 2"))
         w = zeros(T,dim)
         λ = zeros(T,dim)
         η = zero(T)
 
-        if dim > SOC_NO_EXPANSION_MAX_SIZE
+        if dim > GPUsocSize
             sparse_data = SecondOrderConeSparseData{T}(dim)
         else
             sparse_data = nothing
diff --git a/src/gpucones/augment_socp.jl b/src/gpucones/augment_socp.jl
new file mode 100644
index 0000000..32a09ad
--- /dev/null
+++ b/src/gpucones/augment_socp.jl
@@ -0,0 +1,145 @@
+
+function count_soc(cone, size_soc)
+
+    numel_cone = Clarabel.nvars(cone)
+    @assert(numel_cone > size_soc)
+        
+    num_socs = 1
+    numel_cone -= size_soc-1
+
+    while (numel_cone > size_soc-1)
+        numel_cone -= size_soc-2
+        num_socs += 1
+    end
+
+    num_socs += 1
+
+    return num_socs, numel_cone+1
+end
+
+#augment At,b for a large soc
+function augment_data(At0,
+    b0::Vector{T},
+    rng_row,size_soc,num_soc,last_size,augx_idx) where {T}
+
+    At = At0[:,rng_row]
+    b = b0[rng_row]
+    (n,m) = size(At)
+    reduce_soc = size_soc - 2
+    @assert(reduce_soc > 0)
+
+    bnew = sizehint!(T[],m + 2*(num_soc-1))
+    conenew = sizehint!(Clarabel.SupportedCone[],num_soc)
+
+    Atnew = At[:,1]
+    push!(bnew,b[1])
+    idx = 1             #complete index
+    for i in 1:num_soc
+        if i == num_soc
+            rng = idx+1:idx+last_size-1
+            Atnew = hcat(Atnew,At[:,rng])
+            @views append!(bnew,b[rng])
+            push!(conenew, Clarabel.SecondOrderConeT(last_size))
+        else
+            rng = idx+1:idx+reduce_soc
+            Atnew = hcat(Atnew,At[:,rng])
+            @views append!(bnew,b[rng])
+            push!(conenew, Clarabel.SecondOrderConeT(size_soc))
+
+            idx += reduce_soc
+            augx_idx += 1
+            Atnew = hcat(Atnew,sparse([augx_idx, augx_idx],[1,2],[-1,-1],n,2))
+            @views append!(bnew,[0,0])
+        end
+    end
+
+    return Atnew, bnew, conenew, augx_idx
+end
+
+function augment_A_b(cones,
+    P,
+    q::Vector{T},
+    A,
+    b,
+    size_soc,
+    num_socs, last_sizes, soc_indices, soc_starts) where {T}
+
+    (m,n) = size(A)
+    
+    extra_dim = sum(num_socs) - length(num_socs)    #Additional dimensionality for x
+
+    At = vcat(SparseMatrixCSC(A'), spzeros(extra_dim,m))        #May be costly, but more efficient to add rows to a SparseCSR matrix
+    bnew = sizehint!(T[],m + 2*extra_dim)
+    conesnew = sizehint!(Clarabel.SupportedCone[],length(cones) + extra_dim)
+
+    Atnew = spzeros(n+extra_dim,0)
+
+    start_idx = 0
+    end_idx = 0
+    cone_idx = 0
+    augx_idx = n    #the pointer to the auxiliary x used so far
+    
+    for (i,ind) in enumerate(soc_indices)
+
+        @views append!(conesnew, cones[cone_idx+1:ind-1])
+
+        numel_cone = Clarabel.nvars(cones[ind])
+
+        end_idx = soc_starts[i]
+
+        rng = start_idx+1:end_idx
+        @views Atnew = hcat(Atnew, At[:,rng])
+        @views append!(bnew,b[rng])
+
+        start_idx = end_idx
+        end_idx += numel_cone
+        rng_cone = start_idx+1:end_idx
+
+        Ati,bi,conesi,augx_idx = augment_data(At, b, rng_cone,size_soc,num_socs[i],last_sizes[i],augx_idx)        #augment the current large soc
+
+        Atnew = hcat(Atnew, Ati)
+        append!(bnew,bi)
+        append!(conesnew,conesi)
+
+        start_idx = end_idx
+        cone_idx = ind
+    end
+    
+    if (cone_idx< length(cones))
+        @views Atnew = hcat(Atnew, At[:,start_idx+1:end])
+        @views append!(bnew,b[start_idx+1:end])
+        @views append!(conesnew, cones[cone_idx+1:end])
+    end
+
+    Pnew = hcat(vcat(P,spzeros(extra_dim,n)),spzeros(n+extra_dim,extra_dim))
+    return Pnew,vcat(q,zeros(extra_dim)),SparseMatrixCSC(Atnew'), bnew, conesnew
+end
+
+function expand_soc(cones,size_soc)
+    n_large_soc = 0
+    soc_indices = sizehint!(Int[],length(cones))            #Indices of large second-order cones 
+    soc_starts = sizehint!(Int[],length(cones))          #Starting index of each large second-order cone 
+    num_socs = sizehint!(Int[],length(cones))
+    last_sizes = sizehint!(Int[],length(cones))      #Size of the last expanded second-order cones 
+
+    cones_dim = 0
+    for (i,cone) in enumerate(cones)
+        numel_cone = Clarabel.nvars(cone)
+        if isa(cone,Clarabel.SecondOrderConeT) && numel_cone > size_soc
+            append!(soc_indices, i)
+            append!(soc_starts, cones_dim)
+
+            num_soc, last_size = count_soc(cone,size_soc)
+            append!(num_socs, num_soc)
+            append!(last_sizes, last_size)
+            n_large_soc += 1
+        end
+
+        cones_dim += numel_cone
+    end
+
+    resize!(num_socs,n_large_soc)
+    resize!(last_sizes,n_large_soc)
+
+    return num_socs, last_sizes, soc_indices, soc_starts
+end
diff --git a/src/kktsolvers/kktsolver_directldl_gpu.jl b/src/kktsolvers/kktsolver_directldl_gpu.jl
index b612f8a..88d29b2 100644
--- a/src/kktsolvers/kktsolver_directldl_gpu.jl
+++ b/src/kktsolvers/kktsolver_directldl_gpu.jl
@@ -9,7 +9,7 @@ const CuVectorView{T} = SubArray{T, 1, AbstractVector{T}, Tuple{AbstractVector{I
 mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T}
 
     # problem dimensions
-    m::Int; n::Int; p::Int
+    m::Int; n::Int
 
     # Left and right hand sides for solves
     x::AbstractVector{T}
@@ -58,16 +58,13 @@ mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T}
         #YC: update GPU map, should be removed later on
         mapgpu   = GPUDataMap(P,A,cones,mapcpu)
 
-        #YC: disabled sparse expansion
-        #Need this many extra variables for sparse cones
+        #YC: disabled sparse expansion and preprocess a large second-order cone into multiple small cones
         p = pdim(mapcpu.sparse_maps)
-        if p > 0 
-            error("We don't support the augmented sparse cone at the moment!")
-        end
+        @assert(iszero(p))
 
         #updates to the diagonal of KKT will be
         #assigned here before updating matrix entries
-        dim = m + n + p
+        dim = m + n
 
         #LHS/RHS/work for iterative refinement
         x    = CuVector{T}(undef,dim)
@@ -87,7 +84,7 @@ mutable struct GPULDLKKTSolver{T} <: AbstractKKTSolver{T}
         #the indirect linear solver engine
         GPUsolver = GPUsolverT{T}(KKTgpu,x,b)
 
-        return new(m,n,p,x,b,
+        return new(m,n,x,b,
                    work1,work2,mapcpu,mapgpu,
                    Dsigns,
                    Hsblocks,
@@ -147,22 +144,6 @@ function _update_diag_values_KKT!(
     
 end
 
-
-# #scale entries in the kktsolver object using the
-# #given index into its CSC representation
-# function _scale_values!(
-#     GPUsolver::AbstractGPUSolver{T},
-#     KKT::SparseMatrixCSC{T,Ti},
-#     index::AbstractVector{Ti},
-#     scale::T
-# ) where{T,Ti}
-
-#     #Update values in the KKT matrix K
-#     @. KKT.nzval[index] *= scale
-
-# end
-
-
 function kktsolver_update!(
     kktsolver:: GPULDLKKTSolver{T},
     cones::CompositeConeGPU{T}
@@ -196,20 +177,6 @@ function _kktsolver_update_inner!(
     @. kktsolver.Hsblocks *= -one(T)
     _update_values!(GPUsolver,KKT,map.Hsblocks,kktsolver.Hsblocks)
 
-    #YC: disabled sparse expansion
-    # sparse_map_iter = Iterators.Stateful(map.sparse_maps)
-
-    # updateFcn = (index,values) -> _update_values!(GPUsolver,KKTcpu,index,values)
-    # scaleFcn  = (index,scale)  -> _scale_values!(GPUsolver,KKTcpu,index,scale)
-
-    # for cone in cones
-    #     #add sparse expansions columns for sparse cones 
-    #     if @conedispatch is_sparse_expandable(cone)
-    #         thismap = popfirst!(sparse_map_iter)
-    #         _csc_update_sparsecone_full(cone,thismap,updateFcn,scaleFcn)
-    #     end 
-    # end
-
     return _kktsolver_regularize_and_refactor!(kktsolver, GPUsolver)
 
 end
@@ -286,11 +253,10 @@ function kktsolver_setrhs!(
 ) where {T}
 
     b = kktsolver.b
-    (m,n,p) = (kktsolver.m,kktsolver.n,kktsolver.p)
+    (m,n) = (kktsolver.m,kktsolver.n)
 
     b[1:n]             .= rhsx
     b[(n+1):(n+m)]     .= rhsz
-    b[(n+m+1):(n+m+p)] .= 0
     
     CUDA.synchronize()
 
diff --git a/src/problemdata.jl b/src/problemdata.jl
index fc6981e..e9eaa98 100644
--- a/src/problemdata.jl
+++ b/src/problemdata.jl
@@ -29,6 +29,15 @@ function DefaultProblemData{T}(
 		(A, b, cones) = (A_new, b_new, cones_new)
 	end
 
+	# Preprocess large second-order cones
+	if settings.direct_solve_method === :cudss 
+		size_soc = GPUsocSize
+		num_socs, last_sizes, soc_indices, soc_starts = expand_soc(cones,size_soc)
+		P_new,q_new,A_new,b_new,cones_new =  augment_A_b(cones,P,q,A,b,size_soc,num_socs, last_sizes, soc_indices, soc_starts)
+
+		(P, q, A, b, cones) = (P_new, q_new, A_new, b_new, cones_new)
+	end
+
 	# chordal decomposition : return nothing if disabled or no decomp
 	# --------------------------------------
 	chordal_info = try_chordal_info(A,b,cones,settings)
diff --git a/src/solution.jl b/src/solution.jl
index db5c7f1..686a9f3 100644
--- a/src/solution.jl
+++ b/src/solution.jl
@@ -36,6 +36,10 @@ function solution_post_process!(
 
 	if !isnothing(data.presolver) 
 		reverse_presolve!(data.presolver, solution, variables)
+	elseif settings.direct_solve_method === :cudss && (length(solution.x) != length(variables.x))
+		lenx = length(solution.x)
+		@. solution.x = variables.x[1:lenx]		#extra entries are slack variables for large second-order cones
+		println("Solve an augmented problem that only returns value x")
 	else
 		@. solution.x = variables.x
 		@. solution.z = variables.z