From b112aa23cadf065dbdd3a09e5319369caaa8568b Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Fri, 4 Jun 2021 12:36:21 +0200
Subject: [PATCH 01/12] Optimized utils.jl compilation, removed unused

---
 src/utils/utils.jl | 131 ++-------------------------------------------
 test/runtests.jl   |   4 --
 2 files changed, 5 insertions(+), 130 deletions(-)

diff --git a/src/utils/utils.jl b/src/utils/utils.jl
index f88e428..8ff90c4 100644
--- a/src/utils/utils.jl
+++ b/src/utils/utils.jl
@@ -7,7 +7,7 @@ update_args(args::Union{Dict, NamedTuple}, update::Nothing) = args
 update_args(args::Union{Dict, NamedTuple}, update::Union{Dict, NamedTuple}) =
     merge([Dict{Symbol, Any}(zip(keys(d), values(d))) for d in (args, update)]...)
 
-estimate_density_kde(coords::Array{Float64, 2}, points::Array{Float64, 2}, bandwidth::T where T <: Real)::Vector{Float64} =
+estimate_density_kde(coords::Matrix{Float64}, points::Matrix{Float64}, bandwidth::T where T <: Real)::Vector{Float64} =
     InterpKDE(kde((coords[1,:], coords[2,:]), bandwidth=(Float64(bandwidth), Float64(bandwidth)))).itp.(points[1,:], points[2,:])
 
 function val_range(arr::AT where AT <: AbstractArray{<:Real})
@@ -102,8 +102,6 @@ function split(vector::T where T <: AbstractVector; n_parts::Int)
     return [vector[n:min(n + offset - 1, length(vector))] for n in 1:offset:length(vector)]
 end
 
-trim_mean(x::Array{T, 1} where T <: Real; prop::Float64=0.2) = mean(trim(x; prop=prop))
-
 function split(array::T where T <: AbstractVector{TV}, factor::T2 where T2 <: AbstractVector{<:Integer}; max_factor::Union{Int, Nothing}=nothing, drop_zero::Bool=false)::Array{Vector{TV}, 1} where TV
     @assert length(array) == length(factor)
     if max_factor === nothing
@@ -127,88 +125,7 @@ split(array::UnitRange{Int64}, factor::Array{Int64,1}; kwargs...) = split(collec
 split_ids(factor::Array{Int, 1}; kwargs...) = split(1:length(factor), factor; kwargs...)
 
 split(df::DataFrame, factor::Symbol; kwargs...) = split(df, Array(df[!, factor]); kwargs...)
-split(df::DataFrame, factor::Array{Int, 1}; kwargs...) = [df[ids, :] for ids in split(1:size(df, 1), factor; kwargs...)]
-
-function interpolate_linear(x::T, x_start::T, x_end::T; y_start::T=1.0, y_end::T=0.0)::Float64 where T<:Real
-    if x < x_start
-        return y_start
-    elseif x > x_end
-        return y_end
-    end
-
-    return y_start + (x - x_start) / (x_end - x_start) * (y_end - y_start)
-end
-
-function is_point_in_polygon(point::Union{Vector{T1}, Tuple{T1, T1}} where T1 <: Real, poly::Array{Vector{T2}, 1} where T2 <: Real,
-        borders::Union{Array{Vector{T3},1}, Nothing} where T3 = nothing)
-    if borders !== nothing && (borders[1][1] > point[1] || borders[1][2] > point[2] || borders[2][1] < point[1] || borders[2][2] < point[2])
-        return false
-    end
-
-    j = length(poly)
-    c = false
-    for i in 1:length(poly)
-        if ((poly[i][2] > point[2]) != (poly[j][2] > point[2])) &&
-            (point[1] < poly[i][1] + (poly[j][1] - poly[i][1]) * (point[2] - poly[i][2]) / (poly[j][2] - poly[i][2]))
-            c = !c
-        end
-
-        j = i
-    end
-
-    return c
-end
-
-"""Golden section search
-to find the minimum of f on [a,b]
-opt_func: a strictly unimodal function on [a,b]
-
-Returns: tuple with the optimal parameter and the optimal function value
-"""
-function linsearch_gs(opt_func::Function, a::T, b::T; tol=1e-3) where T<: Real
-    gr = (sqrt(5) + 1) / 2
-
-    c = b - (b - a) / gr
-    d = a + (b - a) / gr
-    while abs(c - d) > tol
-        if opt_func(c) < opt_func(d)
-            b = d
-        else
-            a = c
-        end
-        # We recompute both c and d here to avoid loss of precision which may lead to incorrect results or infinite loop
-        c = b - (b - a) / gr
-        d = a + (b - a) / gr
-    end
-
-    return (b + a) / 2, opt_func((b + a) / 2)
-end
-
-function trace_values_along_line(arr::Matrix{T}, start_x::Int, start_y::Int, end_x::Int, end_y::Int)::Vector{T} where T <: Real
-    @assert all([end_y, end_x] .<= size(arr))
-    @assert all([start_y, start_x] .<= size(arr))
-    @assert all([start_y, start_x] .> 0)
-    @assert all([end_y, end_x] .> 0)
-
-    a = (end_y - start_y) / (end_x - start_x)
-    b = end_y - a * end_x
-
-    dx = sign(end_x - start_x)
-    dy = sign(end_y - start_y)
-
-    x, y = start_x, start_y
-
-    vals = [arr[start_y, start_x]]
-    while (x != end_x) | (y != end_y)
-        if (abs((a * x + b) - (y + dy)) < abs((a * (x + dx) + b) - y)) || dx == 0
-            y += dy
-        else
-            x += dx
-        end
-        push!(vals, arr[y, x])
-    end
-    return vals
-end
+split(df::DataFrame, factor::Vector{Int}; kwargs...) = [df[ids, :] for ids in split(1:size(df, 1), factor; kwargs...)]
 
 @inline @fastmath function fmax(v1::T, v2::T) where T <: Real
     v1 > v2 ? v1 : v2
@@ -245,25 +162,6 @@ function estimate_difference_l0(m1::Matrix{Float64}, m2::Matrix{Float64}; col_we
     return max_diff, n_changed / size(m1, 2)
 end
 
-function pairwise_jaccard(values::Array{Vector{Int}, 1}, min_dist::Float64=0.0001)::Matrix{Float64}
-    dist_mat = zeros(length(values), length(values))
-    for i1 in 1:length(values)
-        s1 = values[i1]
-        for i2 in (i1+1):length(values)
-            s2 = values[i2]
-            inter_len = 0
-            for v in s1
-                if v in s2
-                    inter_len += 1
-                end
-            end
-            dist_mat[i1, i2] = dist_mat[i2, i1] = fmax(1.0 - length(inter_len) / (length(s1) + length(s2) - inter_len), min_dist)
-        end
-    end
-
-    return dist_mat
-end
-
 ### Statistics
 
 @inline function fsample(w::Vector{Float64})::Int
@@ -284,7 +182,7 @@ end
 
 @inline @inbounds fsample(arr::Vector{Int}, w::Vector{Float64})::Int = arr[fsample(w)]
 
-function wmean(values::Vector{T} where T <: Real, weights::T1 where T1 <: AbstractVector{Float64}; non_zero_ids::T2 where T2 <: AbstractArray{Int} = 1:length(values))
+function wmean(values::Vector{Float64}, weights::Vector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
     s, ws = 0.0, 0.0
     for i in non_zero_ids
         s += values[i] * weights[i]
@@ -294,7 +192,7 @@ function wmean(values::Vector{T} where T <: Real, weights::T1 where T1 <: Abstra
     return s / ws
 end
 
-function wmean_std(values::Vector{T0} where T0 <: Real, weights::T1 where T1 <: AbstractVector{Float64}; non_zero_ids::T2 where T2 <: AbstractArray{Int} = 1:length(values))
+function wmean_std(values::Vector{Float64}, weights::Vector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
     m = wmean(values, weights; non_zero_ids=non_zero_ids)
     s, ws = 0.0, 0.0
     for i in non_zero_ids
@@ -306,26 +204,7 @@ function wmean_std(values::Vector{T0} where T0 <: Real, weights::T1 where T1 <:
     return m, sqrt(s / ws)
 end
 
-function upscale(image::T where T <: AbstractMatrix{TR}, ratio::Int64) where TR <: Real
-    res_img = zeros(TR, size(image) .* ratio)
-    ct = 1
-    for c in 1:size(image, 2)
-        for m1 in 1:ratio
-            rt = 1
-            for r in 1:size(image, 1)
-                for m2 in 1:ratio
-                    res_img[rt, ct] = image[r, c]
-                    rt += 1
-                end
-            end
-            ct += 1
-        end
-    end
-
-    return res_img
-end
-
-function estimate_hist(vec::Vector{<:Real}, weights=FrequencyWeights(ones(length(vec))); 
+function estimate_hist(vec::Vector{<:Real}, weights=FrequencyWeights(ones(length(vec)));
         ext_cols::NamedTuple=NamedTuple(), rel_width::Float64=0.9, normalize::Union{Bool, Symbol}=false, center=true, bins=nothing, kwargs...)
     hf = (bins === nothing) ? fit(Histogram, vec, weights; kwargs...) : fit(Histogram, vec, weights, bins; kwargs...)
     diffs = rel_width * diff(hf.edges[1])[1]
diff --git a/test/runtests.jl b/test/runtests.jl
index de3625f..68bf1af 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -40,10 +40,6 @@ end
             @test all(chull .== [0 0 2 2 0; 0 2 2 0 0])
             @test B.area(chull) ≈ 4.0
         end
-
-        @testset "utils" begin
-            @test all([B.interpolate_linear(x, 0.0, 1.0; y_start=0.0, y_end=1.0) ≈ x for x in range(0, 1.0, length=50)])
-        end
     end
 
     @testset "distributions" begin

From d53b6d274870ad292601c66f36390d525e221cb2 Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Fri, 4 Jun 2021 13:01:23 +0200
Subject: [PATCH 02/12] Refactored models and utils

---
 src/models/BmmData.jl       | 16 ++++++++--------
 src/models/Component.jl     |  4 ++--
 src/models/InitialParams.jl |  2 +-
 src/utils/convex_hull.jl    |  6 +++---
 src/utils/logging.jl        |  4 ++--
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/models/BmmData.jl b/src/models/BmmData.jl
index c8b9cb1..36a2f62 100644
--- a/src/models/BmmData.jl
+++ b/src/models/BmmData.jl
@@ -63,9 +63,9 @@ mutable struct BmmData{L}
     - `distribution_sampler::Component`:
     - `assignment::Array{Int, 1}`:
     """
-    function BmmData(components::Array{Component{N}, 1}, x::DataFrame, adjacent_points::Array{Vector{Int}, 1}, adjacent_weights::Array{Vector{Float64}, 1}, 
-                     assignment::Vector{Int}, distribution_sampler::Component{N}; real_edge_weight::Float64=1.0, k_neighbors::Int=20, 
-                     cluster_penalty_mult::Float64=0.25, use_gene_smoothing::Bool=true, prior_seg_confidence::Float64=0.5, 
+    function BmmData(components::Array{Component{N}, 1}, x::DataFrame, adjacent_points::Array{Vector{Int}, 1}, adjacent_weights::Array{Vector{Float64}, 1},
+                     assignment::Vector{Int}, distribution_sampler::Component{N}; real_edge_weight::Float64=1.0, k_neighbors::Int=20,
+                     cluster_penalty_mult::Float64=0.25, use_gene_smoothing::Bool=true, prior_seg_confidence::Float64=0.5,
                      min_nuclei_frac::Float64=0.1, mrf_strength::Float64=0.1, na_genes::Vector{Int}=Int[]) where N
         @assert maximum(assignment) <= length(components)
         @assert minimum(assignment) >= 0
@@ -93,11 +93,11 @@ mutable struct BmmData{L}
         cluster_per_molecule = :cluster in propertynames(x) ? x.cluster : Int[]
         self = new{N}(
             x, p_data, comp_data, confidence(x), cluster_per_molecule, Int[], nuclei_probs,
-            adjacent_points, adjacent_weights, real_edge_weight, position_knn_tree, knn_neighbors, 
-            components, deepcopy(distribution_sampler), assignment, length(components), 0.0, 
+            adjacent_points, adjacent_weights, real_edge_weight, position_knn_tree, knn_neighbors,
+            components, deepcopy(distribution_sampler), assignment, length(components), 0.0,
             Int[],
             Int[], Int[], # prior segmentation info
-            Dict{Symbol, Any}(), Dict{Symbol, Any}(), Int[], 
+            Dict{Symbol, Any}(), Dict{Symbol, Any}(), Int[],
             prior_seg_confidence, cluster_penalty_mult, use_gene_smoothing, min_nuclei_frac, mrf_strength
         )
 
@@ -139,7 +139,7 @@ function position_data(df::AbstractDataFrame)::Matrix{Float64}
     if (:z in propertynames(df))
         return copy(Matrix{Float64}(df[:, [:x, :y, :z]])')
     end
-    
+
     return copy(Matrix{Float64}(df[:, [:x, :y]])')
 end
 position_data(data::BmmData)::Matrix{Float64} = data.position_data
@@ -217,7 +217,7 @@ function get_cell_qc_df(segmented_df::DataFrame, cell_assignment::Vector{Int}=se
 
     if dapi_arr !== nothing # TODO: forward DAPI from CLI
         brightness_per_mol = staining_value_per_transcript(segmented_df, dapi_arr);
-        df[!, :mean_dapi_brightness] = trim_mean.(split(brightness_per_mol, cell_assignment .+ 1)[2:end])
+        df[!, :mean_dapi_brightness] = mean.(trim.(split(brightness_per_mol, cell_assignment .+ 1)[2:end]; prop=0.2))
     end
 
     return df
diff --git a/src/models/Component.jl b/src/models/Component.jl
index a2f8dbb..2a8acf1 100644
--- a/src/models/Component.jl
+++ b/src/models/Component.jl
@@ -53,7 +53,7 @@ mutable struct Component{N}
 end
 
 # # This function would be useful when processing QC score as another kind of confidences
-# function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}}, 
+# function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}},
 #         conf_data::T3 where T3 <: AbstractVector{Float64}; nuclei_probs::Union{<:AbstractVector{Float64}, Nothing}=nothing, min_nuclei_frac::Float64=0.1)
 #     c.n_samples = size(pos_data, 2) # TODO: need to replace it with confidences, but for that I need to re-write all prior segmentation code to work with confidences as well. Also may need to adjust some other parts
 #     maximize!(c.composition_params, comp_data, conf_data);
@@ -80,7 +80,7 @@ end
 #     return c
 # end
 
-function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}}; 
+function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}};
         nuclei_probs::Union{<:AbstractVector{Float64}, Nothing}=nothing, min_nuclei_frac::Float64=0.1)
     c.n_samples = size(pos_data, 2)
     maximize!(c.composition_params, comp_data);
diff --git a/src/models/InitialParams.jl b/src/models/InitialParams.jl
index 5bcc526..54d0ec4 100644
--- a/src/models/InitialParams.jl
+++ b/src/models/InitialParams.jl
@@ -8,6 +8,6 @@ struct InitialParams{L}
     InitialParams(centers::Matrix{<:Real}, covs::Float64, assignment::Vector{Int64}) where N =
         new{N}(deepcopy(centers), [CovMat{N}(diagm(0 => (ones(N) .* covs))) for i in 1:size(centers, 1)], deepcopy(assignment))
 
-    InitialParams(centers::Array{T, 2} where T <: Real, covs::Array{<:CovMat{N}, 1}, assignment::Array{Int64,1}) where N =
+    InitialParams(centers::Matrix{<:Real}, covs::Array{<:CovMat{N}, 1}, assignment::Array{Int64,1}) where N =
         new{N}(deepcopy(centers), deepcopy(covs), deepcopy(assignment))
 end
\ No newline at end of file
diff --git a/src/utils/convex_hull.jl b/src/utils/convex_hull.jl
index bc4b063..5a09ee3 100644
--- a/src/utils/convex_hull.jl
+++ b/src/utils/convex_hull.jl
@@ -1,5 +1,5 @@
-convex_hull(a::Array{T, 2} where T<:Real) = convex_hull(collect.(eachcol(a)))
-function convex_hull(a::Array{Array{T,1},1} where T<:Real)
+convex_hull(a::Matrix{<:Real}) = convex_hull(collect.(eachcol(a)))
+function convex_hull(a::Array{Vector{<:Real},1})
     cw(a, b, c) = (a[1]*(b[2]-c[2])+b[1]*(c[2]-a[2])+c[1]*(a[2]-b[2]) < 0);
     ccw(a, b, c) = (a[1]*(b[2]-c[2])+b[1]*(c[2]-a[2])+c[1]*(a[2]-b[2]) > 0);
 
@@ -42,4 +42,4 @@ function area(polygon::Matrix{<:Real})
     end
 
    return abs(area) / 2;
-end
\ No newline at end of file
+end
diff --git a/src/utils/logging.jl b/src/utils/logging.jl
index 68887b0..ad8be5a 100644
--- a/src/utils/logging.jl
+++ b/src/utils/logging.jl
@@ -10,7 +10,7 @@ struct DoubleLogger <: AbstractLogger
     message_limits::Dict{Any,Int}
     force_flush::Bool
 end
-DoubleLogger(cli_stream::IO=stderr, file_stream::Union{IO, Nothing}=nothing, level=Info; force_flush::Bool=false) = 
+DoubleLogger(cli_stream::IO=stderr, file_stream::Union{IO, Nothing}=nothing, level=Info; force_flush::Bool=false) =
     DoubleLogger(cli_stream, file_stream, level, Dict{Any,Int}(), force_flush)
 shouldlog(logger::DoubleLogger, level, _module, group, id) = get(logger.message_limits, id, 1) > 0
 
@@ -46,7 +46,7 @@ function log_message(stream::IO, level, message, _module, filepath, line; force_
 
     msglines[2:end] = (" " ^ (length(prefix) - 1)) .* msglines[2:end]
     if level !== Info
-        source_address = " " * something("$_module", "nothing") * " " * 
+        source_address = " " * something("$_module", "nothing") * " " *
             something("$filepath", "nothing") * ":" * something("$line", "nothing")
         push!(msglines, source_address)
     end

From 36b4bebed678e5c38a6c58e7e57f9b10293669f7 Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Fri, 4 Jun 2021 14:22:42 +0200
Subject: [PATCH 03/12] Refactored data_processing and optimized its
 compilation

---
 src/data_processing/diagnostic_plots.jl         | 14 +++++++-------
 src/data_processing/initialization.jl           | 12 ++++++------
 src/data_processing/neighborhood_composition.jl |  9 ++++-----
 src/data_processing/noise_estimation.jl         |  4 ++--
 src/data_processing/triangulation.jl            |  4 ++--
 src/data_processing/umap_wrappers.jl            |  6 +++---
 6 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/src/data_processing/diagnostic_plots.jl b/src/data_processing/diagnostic_plots.jl
index 168ea5b..bb43e0d 100644
--- a/src/data_processing/diagnostic_plots.jl
+++ b/src/data_processing/diagnostic_plots.jl
@@ -6,12 +6,12 @@ function plot_noise_estimation_diagnostics(edge_lengths::Vector{Float64}, confid
         x_vals = range(0, x_max, length=1000)
         n1 = sum(confidences);
         n2 = length(confidences) - n1;
-        
+
         p_df = estimate_hist(edge_lengths[edge_lengths .< x_max], normalize=true, nbins=bins)
         p_df[!, :intra] = n1 / (n1 + n2) .* pdf.(d1, p_df.s)
         p_df[!, :bg] = n2 / (n1 + n2) .* pdf.(d2, p_df.s)
-        
-        return p_df |> 
+
+        return p_df |>
         @vlplot(x={:s, title="Distance to $(confidence_nn_id)'th nearest neighbor"}, title=title, width=400, height=300) +
         @vlplot(:bar, x2=:e, y={:h, title="Density"}, color={datum="Observed", scale={scheme="category10"}, legend={title="Distribution"}}) +
         @vlplot({:line, size=linewidth}, y=:bg, color={datum="Background"}) +
@@ -44,10 +44,10 @@ function plot_gene_structure(df_spatial::DataFrame, gene_names::Vector, confiden
 
     return p_df |>
     @vlplot(
-        x={:x, scale={domain=val_range(p_df.x)}, title="UMAP-1"}, 
-        y={:y, scale={domain=val_range(p_df.y)}, title="UMAP-2"}, 
-        size={:size, scale={range=[5, 10]}, legend=false}, 
-        tooltip={:gene, type="nominal"}, 
+        x={:x, scale={domain=val_range(p_df.x)}, title="UMAP-1"},
+        y={:y, scale={domain=val_range(p_df.y)}, title="UMAP-2"},
+        size={:size, scale={range=[5, 10]}, legend=false},
+        tooltip={:gene, type="nominal"},
         width=600, height=600, title="Gene local structure",
         config={axis={grid=false, ticks=false, ticklabels=false, labels=false}}
     ) +
diff --git a/src/data_processing/initialization.jl b/src/data_processing/initialization.jl
index 123d334..3fb2e96 100644
--- a/src/data_processing/initialization.jl
+++ b/src/data_processing/initialization.jl
@@ -62,12 +62,12 @@ end
 
 # Initialize cell positions
 
-function position_data_by_assignment(pos_data::T where T<: AbstractMatrix{<:Real}, assignment::Vector{<:Integer})
+function position_data_by_assignment(pos_data::Matrix{Float64}, assignment::Vector{Int})
     filt_ids = findall(assignment .> 0)
     return [pos_data[:, ids] for ids in split(filt_ids, assignment[filt_ids])]
 end
 
-function covs_from_assignment(pos_data::T where T<: AbstractMatrix{<:Real}, assignment::Vector{<:Integer}; min_size::Int=1)::Array{<:CovMat, 1}
+function covs_from_assignment(pos_data::Matrix{Float64}, assignment::Vector{Int}; min_size::Int=1)::Array{<:CovMat, 1}
     pos_data_by_assignment = position_data_by_assignment(pos_data, assignment)
     CM, MV = (size(pos_data, 1) == 2) ? (CovMat{2}, MeanVec{2}) : (CovMat{3}, MeanVec{3})
 
@@ -84,8 +84,8 @@ end
 cell_centers_uniformly(spatial_df::DataFrame, args...; kwargs...) =
     cell_centers_uniformly(position_data(spatial_df), args..., (:confidence in propertynames(spatial_df)) ? spatial_df.confidence : nothing; kwargs...)
 
-function cell_centers_uniformly(pos_data::T where T<: AbstractMatrix{<:Real}, n_clusters::Int,
-        confidences::Union{Vector{Float64}, Nothing}=nothing; scale::Union{<:Real, Nothing})
+function cell_centers_uniformly(pos_data::Matrix{Float64}, n_clusters::Int,
+        confidences::Union{Vector{Float64}, Nothing}=nothing; scale::Union{Float64, Nothing})
     n_clusters = min(n_clusters, size(pos_data, 2))
 
     cluster_centers = pos_data[:, select_ids_uniformly(pos_data', confidences; n=n_clusters, confidence_threshold=0.25)]
@@ -95,10 +95,10 @@ function cell_centers_uniformly(pos_data::T where T<: AbstractMatrix{<:Real}, n_
     if scale === nothing
         covs = covs_from_assignment(pos_data, cluster_labels)
     elseif size(pos_data, 1) == 2
-        scale = Float64(scale) ^ 2
+        scale = scale ^ 2
         covs = [CovMat{2}(diagm(0 => (ones(2) .* scale))) for i in 1:size(cluster_centers, 2)]
     elseif size(pos_data, 1) == 3
-        scale = Float64(scale) ^ 2
+        scale = scale ^ 2
         covs = [CovMat{3}(diagm(0 => (ones(3) .* scale))) for i in 1:size(cluster_centers, 2)]
     else
         error("Unexpected number of dimensions: $(size(pos_data, 1))")
diff --git a/src/data_processing/neighborhood_composition.jl b/src/data_processing/neighborhood_composition.jl
index c3e2c3f..fccb5a1 100644
--- a/src/data_processing/neighborhood_composition.jl
+++ b/src/data_processing/neighborhood_composition.jl
@@ -1,9 +1,9 @@
 using Base.Threads
 
-neighborhood_count_matrix(data::Union{BmmData, T} where T <: AbstractDataFrame, k::Int; kwargs...) =
+neighborhood_count_matrix(data::Union{BmmData, DataFrame}, k::Int; kwargs...) =
     neighborhood_count_matrix(position_data(data), composition_data(data), k; kwargs...)
 
-function neighborhood_count_matrix(pos_data::Matrix{<:Real}, genes::Vector{Int}, k::Int; confidences::Union{Vector{Float64}, Nothing}=nothing,
+function neighborhood_count_matrix(pos_data::Matrix{Float64}, genes::Vector{Int}, k::Int; confidences::Union{Vector{Float64}, Nothing}=nothing,
     n_genes::Int=maximum(genes), normalize_by_dist::Bool=true, normalize::Bool=true)
     if k < 3
         @warn "Too small value of k: $k. Setting it to 3."
@@ -18,7 +18,6 @@ function neighborhood_count_matrix(pos_data::Matrix{<:Real}, genes::Vector{Int},
         @warn "`confidences` are not supported with `normalize=true`. Ignoring them."
         confidences = nothing
     end
-        
 
     k = min(k, size(pos_data, 2))
 
@@ -117,9 +116,9 @@ function gene_composition_colors!(embedding::Matrix{Float64}; lrange::Tuple{<:Re
     return vec(mapslices(col -> Colors.Lab(col...), embedding, dims=1))
 end
 
-function gene_composition_colors(df_spatial::T where T <: AbstractDataFrame, k::Int; kwargs...)
+function gene_composition_colors(df_spatial::DataFrame, k::Int; kwargs...)
     neighb_cm = neighborhood_count_matrix(df_spatial, k);
     confidence = (:confidence in propertynames(df_spatial)) ? df_spatial.confidence : ones(size(df_spatial, 1))
     transformation = gene_composition_transformation(neighb_cm, confidence)
     return gene_composition_colors(neighb_cm, transformation; kwargs...)
-end
\ No newline at end of file
+end
diff --git a/src/data_processing/noise_estimation.jl b/src/data_processing/noise_estimation.jl
index 6be9eef..e536249 100644
--- a/src/data_processing/noise_estimation.jl
+++ b/src/data_processing/noise_estimation.jl
@@ -1,8 +1,8 @@
-@views maximize_noise_distributions(edge_lengths::Vector{Float64}, assignment_probs::Matrix{Float64}; updating_ids::T2 where T2 <: AbstractArray{Int} = 1:length(edge_lengths)) =
+@views maximize_noise_distributions(edge_lengths::Vector{Float64}, assignment_probs::Matrix{Float64}; updating_ids::Vector{Int}) =
     (Normal(wmean_std(edge_lengths, assignment_probs[:,i], non_zero_ids=updating_ids)...) for i in 1:2)
 
 function expect_noise_probabilities!(assignment_probs::Matrix{Float64}, d1::Normal, d2::Normal, edge_lengths::Vector{Float64},
-        adjacent_points::Vector{Vector{Int}}, adjacent_weights::Vector{Vector{Float64}}; updating_ids::T where T <: AbstractVector{Int} = 1:length(edge_lengths),
+        adjacent_points::Vector{Vector{Int}}, adjacent_weights::Vector{Vector{Float64}}; updating_ids::Vector{Int},
         min_confidence::Union{Vector{Float64}, Nothing}=nothing)
     norm_denses = (pdf.(d1, edge_lengths), pdf.(d2, edge_lengths));
     n1 = sum(assignment_probs[:, 1])
diff --git a/src/data_processing/triangulation.jl b/src/data_processing/triangulation.jl
index 6676268..0ce6616 100644
--- a/src/data_processing/triangulation.jl
+++ b/src/data_processing/triangulation.jl
@@ -24,7 +24,7 @@ getx(p::IndexedPoint2D) = p._x
 gety(p::IndexedPoint2D) = p._y
 geti(p::IndexedPoint2D) = p._index
 
-function adjacency_list(points::AbstractArray{T, 2} where T <: Real; filter::Bool=true, n_mads::T2 where T2 <: Real =2, k_adj::Int=5,
+function adjacency_list(points::Matrix{<:Real}; filter::Bool=true, n_mads::Float64=2.0, k_adj::Int=5,
         adjacency_type::Symbol=:triangulation, distance::TD where TD <: Distances.SemiMetric = Euclidean())
     if size(points, 1) == 3
         (adjacency_type == :knn) || @warn "Only k-nn random field is supported for 3D data"
@@ -81,7 +81,7 @@ end
 
 adjacency_list(spatial_df::DataFrame; kwargs...) = adjacency_list(position_data(spatial_df); kwargs...)
 
-function connected_components(adjacent_points::Array{Array{Int, 1}, 1})
+function connected_components(adjacent_points::Array{Vector{Int}, 1})
     g = LightGraphs.SimpleGraph(length(adjacent_points));
     for (v1, vs) in enumerate(adjacent_points)
         for v2 in vs
diff --git a/src/data_processing/umap_wrappers.jl b/src/data_processing/umap_wrappers.jl
index c5d4ba3..429ff4b 100644
--- a/src/data_processing/umap_wrappers.jl
+++ b/src/data_processing/umap_wrappers.jl
@@ -23,15 +23,15 @@ function fit(::Type{UmapFit}, x::Array{Float64, 2}, pca::MultivariateStats.PCA;
     return UmapFit(embedding, nn_tree, pca, nn_interpolate)
 end
 
-fit(::Type{UmapFit}, x::Array{Float64, 2}; n_pcs::Int=15, kwargs...)::UmapFit =
+fit(::Type{UmapFit}, x::Matrix{Float64}; n_pcs::Int=15, kwargs...)::UmapFit =
     fit(UmapFit, x, fit(MultivariateStats.PCA, x, maxoutdim=n_pcs); kwargs...)
 
-function transform(transformation::UmapFit, x::Array{Float64, 2}; dist_offset::Float64=1e-10)::Array{Float64, 2}
+function transform(transformation::UmapFit, x::Matrix{Float64}; dist_offset::Float64=1e-10)::Matrix{Float64}
     x_pc = transform(transformation.pca_transform, x)
 
     # This transformation is much faster than the implementation from UMAP.jl, even for input dimensionality = 50.
     # Probably, because of the slow NearestNeighborDescent package.
     indices, distances = knn(transformation.nn_tree, x_pc, transformation.nn_interpolate)
-    res = [mapslices(x -> wmean(x, 1 ./ (dists .+ dist_offset)), transformation.embedding[:, ids], dims=2) for (ids, dists) in zip(indices, distances)]
+    res = [mapslices(v -> wmean(v, 1 ./ (dists .+ dist_offset)), transformation.embedding[:, ids], dims=2) for (ids, dists) in zip(indices, distances)]
     return hcat(res...)
 end
\ No newline at end of file

From 0de67290409d22ef0df4e08b060e37885578d66a Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Fri, 4 Jun 2021 17:05:26 +0200
Subject: [PATCH 04/12] Fixed generic types

---
 src/utils/convex_hull.jl | 4 ++--
 src/utils/utils.jl       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/utils/convex_hull.jl b/src/utils/convex_hull.jl
index 5a09ee3..dda508f 100644
--- a/src/utils/convex_hull.jl
+++ b/src/utils/convex_hull.jl
@@ -1,5 +1,5 @@
-convex_hull(a::Matrix{<:Real}) = convex_hull(collect.(eachcol(a)))
-function convex_hull(a::Array{Vector{<:Real},1})
+convex_hull(a::Matrix{<: Real}) = convex_hull(collect.(eachcol(a)))
+function convex_hull(a::Array{Vector{T},1} where T<: Real)
     cw(a, b, c) = (a[1]*(b[2]-c[2])+b[1]*(c[2]-a[2])+c[1]*(a[2]-b[2]) < 0);
     ccw(a, b, c) = (a[1]*(b[2]-c[2])+b[1]*(c[2]-a[2])+c[1]*(a[2]-b[2]) > 0);
 
diff --git a/src/utils/utils.jl b/src/utils/utils.jl
index 8ff90c4..b81623c 100644
--- a/src/utils/utils.jl
+++ b/src/utils/utils.jl
@@ -182,7 +182,7 @@ end
 
 @inline @inbounds fsample(arr::Vector{Int}, w::Vector{Float64})::Int = arr[fsample(w)]
 
-function wmean(values::Vector{Float64}, weights::Vector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
+function wmean(values::Vector{Float64}, weights::T where T <: AbstractVector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
     s, ws = 0.0, 0.0
     for i in non_zero_ids
         s += values[i] * weights[i]
@@ -192,7 +192,7 @@ function wmean(values::Vector{Float64}, weights::Vector{Float64}; non_zero_ids::
     return s / ws
 end
 
-function wmean_std(values::Vector{Float64}, weights::Vector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
+function wmean_std(values::Vector{Float64}, weights::T where T <: AbstractVector{Float64}; non_zero_ids::Union{UnitRange{Int}, Vector{Int}}=1:length(values))
     m = wmean(values, weights; non_zero_ids=non_zero_ids)
     s, ws = 0.0, 0.0
     for i in non_zero_ids

From ada5143f66a9b3f60e6bca34440ba35e3f3eab46 Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Fri, 4 Jun 2021 17:05:41 +0200
Subject: [PATCH 05/12] Fixed tests, added 3d run

---
 test/runtests.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 68bf1af..9a63aa7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -82,7 +82,7 @@ end
             @test !any(i in bm_data.adjacent_points[i] for i in 1:length(bm_data.adjacent_points))
 
             for i in 5:10:55
-                init_params = B.cell_centers_uniformly(df, i; scale=10)
+                init_params = B.cell_centers_uniformly(df, i; scale=10.0)
                 @test size(init_params.centers, 1) == i
                 @test size(init_params.centers, 2) == 2
                 @test length(init_params.covs) == i
@@ -167,7 +167,7 @@ end
 
         @testset "synthetic_run" begin
             seed!(42)
-            for i in 1:6
+            for it in 1:6
                 n_components = 10;
                 n_genes = 20
                 scale = 0.2;
@@ -187,7 +187,11 @@ end
                 df_spatial = vcat(df_spatial, DataFrame(:x => rand(noise_size) * frame_size[1], :y => rand(noise_size) * frame_size[2],
                     :gene => rand(1:n_genes, noise_size), :cell => 0));
 
-                if i > 4
+                if it % 2 == 0
+                    df_spatial[!, :z] = rand(1:3, size(df_spatial, 1)) .* scale ./ 10
+                end
+
+                if it > 4
                     df_spatial[!, :cluster] = df_spatial.cell .+ 1
                 end
                 bm_data_arr = B.initialize_bmm_data(df_spatial; scale=scale, scale_std="5%", min_molecules_per_cell=10);

From 572ec095a4313d407ce11b1e7d1adfc0443d32cd Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Tue, 29 Jun 2021 12:57:57 +0200
Subject: [PATCH 06/12] Updated Dockerfile and build.jl

---
 bin/build.jl      |  5 +++--
 docker/Dockerfile | 30 ++++--------------------------
 2 files changed, 7 insertions(+), 28 deletions(-)

diff --git a/bin/build.jl b/bin/build.jl
index e0f5c60..9530540 100755
--- a/bin/build.jl
+++ b/bin/build.jl
@@ -1,9 +1,10 @@
 #! /usr/bin/env julia
 
 using PackageCompiler
+import Pkg
 
 out_path = length(ARGS) > 1 ? ARGS[1] : "Baysor";
 baysor_path = length(ARGS) > 2 ? ARGS[2] : dirname(@__DIR__);
 
-Base.LOAD_PATH .= baysor_path
-create_app(baysor_path, out_path; precompile_execution_file="$(baysor_path)/bin/build.jl")
\ No newline at end of file
+Pkg.activate(baysor_path)
+create_app(baysor_path, out_path; precompile_execution_file="$(baysor_path)/bin/precompile.jl")
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 5a57005..7adc810 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -11,28 +11,6 @@ RUN pip3 install jupyterlab numpy scipy matplotlib seaborn pandas sklearn scikit
 
 RUN pip3 install -Iv six==1.12.0
 
-RUN \
-    jupyter nbextensions_configurator enable && \
-    jupyter contrib nbextension install
-
-RUN \
-    jupyter nbextension enable codefolding/main && \
-    jupyter nbextension enable codefolding/edit && \
-    jupyter nbextension enable collapsible_headings/main && \
-    jupyter nbextension enable notify/notify && \
-    jupyter nbextension enable ruler/main && \
-    jupyter nbextension enable table_beautifier/main && \
-    jupyter nbextension enable comment-uncomment/main && \
-    jupyter nbextension enable hide_header/main && \
-    jupyter nbextension enable livemdpreview/livemdpreview && \
-    jupyter nbextension enable spellchecker/main && \
-    jupyter nbextension enable execution_dependencies/execution_dependencies && \
-    jupyter nbextension enable freeze/main && \
-    jupyter nbextension enable tree-filter/index && \
-    jupyter nbextension enable init_cell/main && \
-    jupyter nbextension enable move_selected_cells/main && \
-    jupyter nbextension enable toc2/main
-
 RUN julia -e 'using Pkg; Pkg.add("IJulia"); Pkg.build(); using IJulia;'
 
 ### jupyter notebook --no-browser --port=8989 --ip=0.0.0.0 --allow-root ./
@@ -40,13 +18,13 @@ RUN julia -e 'using Pkg; Pkg.add("IJulia"); Pkg.build(); using IJulia;'
 ## Julia Baysor envitonment
 
 RUN julia -e 'using Pkg; Pkg.add("PackageCompiler")'
-RUN julia -e 'using Pkg; Pkg.add(PackageSpec(url="https://github.com/hms-dbmi/Baysor.git")); Pkg.build(); using Baysor;'
+RUN julia -e 'using Pkg; Pkg.add(PackageSpec(url="https://github.com/hms-dbmi/Baysor.git")); Pkg.build();'
 
-# See https://discourse.julialang.org/t/sysimage-incompatible-between-intel-and-amd-x86-64/42716/2 for cpu_target flags
-RUN julia -e 'using PackageCompiler; import Baysor; create_sysimage(:Baysor; precompile_execution_file="$(dirname(pathof(Baysor)))/../bin/build.jl", replace_default=true, cpu_target="generic")'
+RUN julia -e 'import Baysor, Pkg; Pkg.activate(dirname(dirname(pathof(Baysor)))); Pkg.instantiate();'
+RUN julia -e 'using PackageCompiler; import Baysor, Pkg; Pkg.activate(dirname(dirname(pathof(Baysor)))); create_sysimage(:Baysor; precompile_execution_file="$(dirname(pathof(Baysor)))/../bin/precompile.jl", replace_default=true, cpu_target="generic")'
 
 RUN \
-    echo "#!/usr/bin/env julia\n\nimport Baysor\nBaysor.run_cli()" >> /bin/baysor && \
+    printf "#!/usr/bin/env julia\n\nimport Baysor\nBaysor.run_cli()" >> /bin/baysor && \
     chmod +x /bin/baysor
 
 ENTRYPOINT ["/bin/bash"]

From 864ba5521b69e2c8ccf5fd7237e6973589b2199d Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Tue, 29 Jun 2021 13:14:52 +0200
Subject: [PATCH 07/12] Fixed nuclei weighting

- Removed unused weighted functions
- `nuclei_probs` only affect mean, but not covariance
---
 CHANGELOG.md                  |  7 +++++++
 src/distributions/MvNormal.jl | 30 ++++++------------------------
 src/models/Component.jl       | 34 +++-------------------------------
 test/runtests.jl              |  2 +-
 4 files changed, 17 insertions(+), 56 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6cfbd23..7a1ec73 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## Upcoming
+
+### Changed
+
+- Slightly optimized compilation time
+- Minor updates of core formulas
+
 ## [0.5.0] — 2021-06-03
 
 ### Changed
diff --git a/src/distributions/MvNormal.jl b/src/distributions/MvNormal.jl
index ad1db2b..e053b60 100644
--- a/src/distributions/MvNormal.jl
+++ b/src/distributions/MvNormal.jl
@@ -19,7 +19,7 @@ struct MvNormalF{L}
 
         return MvNormalF(MeanVec{3}(μ))
     end
-        
+
     function MvNormalF(μ::T1 where T1 <: AbstractVector{Float64}, Σ::T2 where T2 <: AbstractMatrix{Float64})
         if length(μ) == 2
             return MvNormalF(MeanVec{2}(μ), CovMat{2}(Σ))
@@ -68,7 +68,7 @@ end
 pdf(d::MvNormalF, x::SVector{N, Float64} where N) = exp(log_pdf(d, x))
 shape(d::MvNormalF) = Matrix(d.Σ)
 
-function estimate_sample_cov!(Σ::CovMat{N}, x::T where T <: AbstractMatrix{Float64}, weights::Nothing=nothing; μ::MeanVec{N}) where N
+function estimate_sample_cov!(Σ::CovMat{N}, x::T where T <: AbstractMatrix{Float64}; μ::MeanVec{N}) where N
     # https://en.wikipedia.org/wiki/Estimation_of_covariance_matrices#Intrinsic_expectation
     Σ .= 0.0
     for i in 1:size(x, 2)
@@ -83,39 +83,21 @@ function estimate_sample_cov!(Σ::CovMat{N}, x::T where T <: AbstractMatrix{Floa
     return Σ
 end
 
-function estimate_sample_cov!(Σ::CovMat{N}, x::T where T <: AbstractMatrix{Float64}, weights::T2 where T2 <: AbstractVector{<:Real}; μ::MeanVec{N}) where N
-    # https://en.wikipedia.org/wiki/Estimation_of_covariance_matrices#Intrinsic_expectation
-    Σ .= 0.0
-    sum_w = 0.0
-    for i in 1:size(x, 2)
-        w = fmax(weights[i], 0.01)
-        for c in 1:size(Σ, 1)
-            for r in 1:size(Σ, 1)
-                Σ[r, c] += w * (x[c, i] - μ[c]) * (x[r, i] - μ[r])
-            end
-        end
-        sum_w += w
-    end
-
-    Σ ./= sum_w
-    return Σ
-end
-
-function maximize!(dist::MvNormalF, x::T, confidences::T2 where T2 <: Union{AbstractVector{<:Real}, Nothing}=nothing)::MvNormalF where T <: AbstractMatrix{Float64}
+function maximize!(dist::MvNormalF, x::T; center_probs::T2 where T2 <: Union{AbstractVector{<:Real}, Nothing}=nothing)::MvNormalF where T <: AbstractMatrix{Float64}
     if size(x, 2) == 0
         return dist
     end
 
-    if confidences === nothing
+    if center_probs === nothing
         mean!(dist.μ, x)
     else
-        wmean!(dist.μ, x, confidences)
+        wmean!(dist.μ, x, center_probs)
     end
     if size(x, 2) <= length(dist.μ)
         return dist
     end
 
-    estimate_sample_cov!(dist.Σ, x, confidences; μ=dist.μ)
+    estimate_sample_cov!(dist.Σ, x; μ=dist.μ)
     return dist
 end
 
diff --git a/src/models/Component.jl b/src/models/Component.jl
index 2a8acf1..6b5c869 100644
--- a/src/models/Component.jl
+++ b/src/models/Component.jl
@@ -52,42 +52,14 @@ mutable struct Component{N}
         new{L}(position_params, composition_params, n_samples, 1.0, confidence, shape_prior, Dict{Int, Int}(), guid)
 end
 
-# # This function would be useful when processing QC score as another kind of confidences
-# function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}},
-#         conf_data::T3 where T3 <: AbstractVector{Float64}; nuclei_probs::Union{<:AbstractVector{Float64}, Nothing}=nothing, min_nuclei_frac::Float64=0.1)
-#     c.n_samples = size(pos_data, 2) # TODO: need to replace it with confidences, but for that I need to re-write all prior segmentation code to work with confidences as well. Also may need to adjust some other parts
-#     maximize!(c.composition_params, comp_data, conf_data);
-#     if nuclei_probs === nothing
-#         maximize!(c.position_params, pos_data, conf_data);
-#     else
-#         maximize!(c.position_params, pos_data, conf_data .* nuclei_probs);
-#         if length(nuclei_probs) > 1
-#             c.confidence = quantile(nuclei_probs, 1 - min_nuclei_frac)
-#         end
-#     end
-
-#     if c.shape_prior !== nothing
-#         try
-#             adjust_cov_by_prior!(c.position_params.Σ, c.shape_prior; n_samples=sum(conf_data))
-#         catch
-#             @show c.position_params.Σ
-#             @show c.shape_prior
-#             @show sum(conf_data)
-#             rethrow()
-#         end
-#     end
-
-#     return c
-# end
-
 function maximize!(c::Component{N} where N, pos_data::T1 where T1 <: AbstractMatrix{Float64}, comp_data::T2 where T2 <: Union{AbstractVector{Int}, AbstractVector{Union{Int, Missing}}};
         nuclei_probs::Union{<:AbstractVector{Float64}, Nothing}=nothing, min_nuclei_frac::Float64=0.1)
     c.n_samples = size(pos_data, 2)
-    maximize!(c.composition_params, comp_data);
+    maximize!(c.composition_params, comp_data)
     if nuclei_probs === nothing
-        maximize!(c.position_params, pos_data);
+        maximize!(c.position_params, pos_data)
     else
-        maximize!(c.position_params, pos_data, nuclei_probs);
+        maximize!(c.position_params, pos_data; center_probs=nuclei_probs)
         if length(nuclei_probs) > 1
             c.confidence = quantile(nuclei_probs, 1 - min_nuclei_frac)
         end
diff --git a/test/runtests.jl b/test/runtests.jl
index 9a63aa7..8ec0f32 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -158,7 +158,7 @@ end
             is_not_nan = map(1:10000) do i
                 n_samps = rand(0:100)
                 d = B.MvNormalF([0., 0.], diagm(0 => ones(2)));
-                B.maximize!(d, rand(2, n_samps), rand(n_samps) .* rand(Binomial(1, 0.5), n_samps))
+                B.maximize!(d, rand(2, n_samps); center_probs=rand(n_samps) .* rand(Binomial(1, 0.5), n_samps))
                 !isnan(d.Σ[1])
             end
 

From 4e7214f344bdb56ffc30545159d5ab90812dfe44 Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Wed, 30 Jun 2021 12:08:32 +0200
Subject: [PATCH 08/12] Renamed zero_comp_weight to bg_*; updated Vega
 dependencies

- Allow bg_comp_weight to be 0.
- Vega dependencies are needed for transferable binaries
---
 Manifest.toml                      | 24 ++++++++++++++----------
 src/bmm_algorithm/bmm_algorithm.jl | 18 +++++++++---------
 2 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index d09abcc..59f4d18 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -343,9 +343,9 @@ version = "1.4.1"
 
 [[FFTW_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "5a0d4b6a22a34d17d53543bd124f4b08ed78e8b0"
+git-tree-sha1 = "3676abafff7e4ff07bbd2c42b3d8201f31653dcc"
 uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a"
-version = "3.3.9+7"
+version = "3.3.9+8"
 
 [[FileIO]]
 deps = ["Pkg", "Requires", "UUIDs"]
@@ -739,9 +739,9 @@ version = "1.42.0+0"
 
 [[Libiconv_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
-git-tree-sha1 = "8d22e127ea9a0917bc98ebd3755c8bd31989381e"
+git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778"
 uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531"
-version = "1.16.1+0"
+version = "1.16.1+1"
 
 [[Libmount_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -800,9 +800,9 @@ version = "0.10.1"
 
 [[MKL_jll]]
 deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"]
-git-tree-sha1 = "c253236b0ed414624b083e6b72bfe891fbd2c7af"
+git-tree-sha1 = "5455aef09b40e5020e1520f551fa3135040d4ed0"
 uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7"
-version = "2021.1.1+1"
+version = "2021.1.1+2"
 
 [[MacroTools]]
 deps = ["Markdown", "Random"]
@@ -1319,15 +1319,19 @@ version = "0.4.1"
 
 [[Vega]]
 deps = ["DataStructures", "DataValues", "Dates", "FileIO", "FilePaths", "IteratorInterfaceExtensions", "JSON", "JSONSchema", "MacroTools", "NodeJS", "Pkg", "REPL", "Random", "Setfield", "TableTraits", "TableTraitsUtils", "URIParser"]
-git-tree-sha1 = "43f83d3119a868874d18da6bca0f4b5b6aae53f7"
+git-tree-sha1 = "ee32dec286e8e946b76475a6a685e2381ae6c47a"
+repo-rev = "master"
+repo-url = "https://github.com/VPetukhov/Vega.jl"
 uuid = "239c3e63-733f-47ad-beb7-a12fde22c578"
-version = "2.3.0"
+version = "2.3.1-DEV"
 
 [[VegaLite]]
 deps = ["Base64", "DataStructures", "DataValues", "Dates", "FileIO", "FilePaths", "IteratorInterfaceExtensions", "JSON", "MacroTools", "NodeJS", "Pkg", "REPL", "Random", "TableTraits", "TableTraitsUtils", "URIParser", "Vega"]
-git-tree-sha1 = "3e23f28af36da21bfb4acef08b144f92ad205660"
+git-tree-sha1 = "6b9171bca7e92c4f506a375746f0cbcc1c2da744"
+repo-rev = "master"
+repo-url = "https://github.com/queryverse/VegaLite.jl.git"
 uuid = "112f6efa-9a02-5b7d-90c0-432ed331239a"
-version = "2.6.0"
+version = "2.6.1-DEV"
 
 [[VoronoiDelaunay]]
 deps = ["Colors", "GeometricalPredicates", "Random"]
diff --git a/src/bmm_algorithm/bmm_algorithm.jl b/src/bmm_algorithm/bmm_algorithm.jl
index 44da9a2..6bd0735 100644
--- a/src/bmm_algorithm/bmm_algorithm.jl
+++ b/src/bmm_algorithm/bmm_algorithm.jl
@@ -45,14 +45,14 @@ end
     empty!(component_weights)
     empty!(comp_weights)
     empty!(comp_ids)
-    zero_comp_weight = 0.0
+    bg_comp_weight = 0.0
 
     @inbounds @simd for i in 1:length(adjacent_weights)
         c_point = adjacent_points[i]
         c_id = assignment[c_point]
         cw = adjacent_weights[i]
         if c_id == 0
-            zero_comp_weight += cw
+            bg_comp_weight += cw
         else
             component_weights[c_id] = get(component_weights, c_id, 0.0) + cw
         end
@@ -63,13 +63,13 @@ end
         push!(comp_weights, v)
     end
 
-    return zero_comp_weight
+    return bg_comp_weight
 end
 
 @inline function fill_adjacent_component_weights!(adj_classes::Vector{Int}, adj_weights::Vector{Float64}, data::BmmData, mol_id::Int;
         component_weights::Dict{Int, Float64}, adj_classes_global::Dict{Int, Vector{Int}})
     # Looks like it's impossible to optimize further, even with vectorization. It means that creating vectorized version of expect_dirichlet_spatial makes few sense
-    zero_comp_weight = aggregate_adjacent_component_weights!(adj_classes, adj_weights, component_weights, data.assignment,
+    bg_comp_weight = aggregate_adjacent_component_weights!(adj_classes, adj_weights, component_weights, data.assignment,
         data.adjacent_points[mol_id], data.adjacent_weights[mol_id])
 
     if mol_id in keys(adj_classes_global)
@@ -78,7 +78,7 @@ end
         append!(adj_weights, ones(length(adj_classes) - n1) .* data.real_edge_weight)
     end
 
-    return zero_comp_weight
+    return bg_comp_weight
 end
 
 function adjust_densities_by_prior_segmentation!(denses::Vector{Float64}, segment_id::Int, largest_cell_id::Int,
@@ -105,7 +105,7 @@ function adjust_densities_by_prior_segmentation!(denses::Vector{Float64}, segmen
 end
 
 function expect_density_for_molecule!(denses::Vector{Float64}, data::BmmData{N}, mol_id::Int;
-        zero_comp_weight::Float64, adj_classes::Vector{Int}, adj_weights::Vector{Float64}) where N
+        bg_comp_weight::Float64, adj_classes::Vector{Int}, adj_weights::Vector{Float64}) where N
     x = SVector{N}(position_data(data)[:,mol_id]...)
     gene::Union{Int, Missing} = composition_data(data)[mol_id]
     confidence::Float64 = data.confidence[mol_id]
@@ -143,7 +143,7 @@ function expect_density_for_molecule!(denses::Vector{Float64}, data::BmmData{N},
     end
 
     if confidence < 1.0
-        push!(denses, (1 - confidence) * exp(data.mrf_strength * fmax(data.real_edge_weight, zero_comp_weight)) * data.noise_density)
+        push!(denses, (1 - confidence) * exp(data.mrf_strength * bg_comp_weight) * data.noise_density)
         push!(adj_classes, 0)
     end
 
@@ -159,10 +159,10 @@ function expect_dirichlet_spatial!(data::BmmData; stochastic::Bool=true)
     adj_classes_global = get_global_adjacent_classes(data)
 
     for i in 1:size(data.x, 1)
-        zero_comp_weight = fill_adjacent_component_weights!(adj_classes, adj_weights, data, i;
+        bg_comp_weight = fill_adjacent_component_weights!(adj_classes, adj_weights, data, i;
             component_weights=component_weights, adj_classes_global=adj_classes_global)
 
-        expect_density_for_molecule!(denses, data, i; adj_classes=adj_classes, adj_weights=adj_weights, zero_comp_weight=zero_comp_weight)
+        expect_density_for_molecule!(denses, data, i; adj_classes=adj_classes, adj_weights=adj_weights, bg_comp_weight=bg_comp_weight)
 
         assign_molecule!(data, i; denses=denses, adj_classes=adj_classes, stochastic=stochastic)
     end

From eafdb92a14279a7cd9e819c6c5eb1cbd980fa9d3 Mon Sep 17 00:00:00 2001
From: latticetower <merlettaia@gmail.com>
Date: Tue, 30 Nov 2021 19:11:23 +0100
Subject: [PATCH 09/12] add flag to avoid computation of neighborhood
 composition vectors

---
 src/cli/main.jl | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/cli/main.jl b/src/cli/main.jl
index 81d8a29..be368ea 100644
--- a/src/cli/main.jl
+++ b/src/cli/main.jl
@@ -65,6 +65,10 @@ function parse_commandline(args::Union{Nothing, Array{String, 1}}=nothing) # TOD
             arg_type = Float64
             default = 0.2
 
+        "--estimate-ncvs"
+            help = "Estimate neighborhood composition vectors"
+            action = :store_true
+
         "coordinates"
             help = "CSV file with coordinates of molecules and gene type"
             required = true
@@ -389,9 +393,11 @@ function save_segmentation_results(bm_data::BmmData, gene_names::Vector{String},
     segmentated_df = get_segmentation_df(bm_data, gene_names)
     cell_stat_df = get_cell_stat_df(bm_data, segmentated_df; add_qc=true)
 
-    @info "Estimating local colors"
-    gene_colors = gene_composition_colors(bm_data.x, args["gene-composition-neigborhood"])
-    segmentated_df[!, :ncv_color] = "#" .* Colors.hex.(gene_colors)
+    if args["estimate-ncvs"]
+        @info "Estimating local colors"
+        gene_colors = gene_composition_colors(bm_data.x, args["gene-composition-neigborhood"])
+        segmentated_df[!, :ncv_color] = "#" .* Colors.hex.(gene_colors)
+    end
 
     @info "Saving results to $(args["output"])"
     CSV.write(args["output"], segmentated_df[sortperm(segmentated_df.molecule_id), :]);
@@ -403,14 +409,16 @@ function save_segmentation_results(bm_data::BmmData, gene_names::Vector{String},
 
     if args["plot"]
         plot_diagnostics_panel(segmentated_df, segmentated_df.cell, bm_data.tracer, args; clust_res=mol_clusts, comp_segs=comp_segs)
-        polygons = plot_transcript_assignment_panel(bm_data.x, bm_data.assignment, args; clusters=bm_data.cluster_per_molecule, prior_polygons=prior_polygons,
-            gene_colors=gene_colors)
-
-        if args["save-polygons"] !== nothing
-            if lowercase(args["save-polygons"]) == "geojson"
-                save_polygons_to_geojson(polygons, append_suffix(args["output"], "polygons.json"))
-            else
-                @warn "Unknown polygon format: $(args["save-polygons"])"
+        if args["estimate-ncvs"]
+            polygons = plot_transcript_assignment_panel(bm_data.x, bm_data.assignment, args; clusters=bm_data.cluster_per_molecule, prior_polygons=prior_polygons,
+                gene_colors=gene_colors)
+
+            if args["save-polygons"] !== nothing
+                if lowercase(args["save-polygons"]) == "geojson"
+                    save_polygons_to_geojson(polygons, append_suffix(args["output"], "polygons.json"))
+                else
+                    @warn "Unknown polygon format: $(args["save-polygons"])"
+                end
             end
         end
     end

From bd9af08cb7c8e0565954264478cd068e251357b3 Mon Sep 17 00:00:00 2001
From: latticetower <merlettaia@gmail.com>
Date: Tue, 30 Nov 2021 20:39:42 +0100
Subject: [PATCH 10/12] fix estimate-ncvs to be turned on by default

---
 src/cli/main.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/cli/main.jl b/src/cli/main.jl
index be368ea..52e4080 100644
--- a/src/cli/main.jl
+++ b/src/cli/main.jl
@@ -65,9 +65,10 @@ function parse_commandline(args::Union{Nothing, Array{String, 1}}=nothing) # TOD
             arg_type = Float64
             default = 0.2
 
-        "--estimate-ncvs"
-            help = "Estimate neighborhood composition vectors"
-            action = :store_true
+        "--no-ncvs-estimation"
+            dest_name = "estimate-ncvs"
+            help = "Turns off neighborhood composition vectors estimation"
+            action = :store_false
 
         "coordinates"
             help = "CSV file with coordinates of molecules and gene type"

From 555e77d8fe9078e48d31c89cde778f5cccca2b7e Mon Sep 17 00:00:00 2001
From: latticetower <merlettaia@gmail.com>
Date: Tue, 30 Nov 2021 22:03:57 +0100
Subject: [PATCH 11/12] fix naming for parameter: no-ncvs-estimation ->
 no-ncv-estimation

---
 src/cli/main.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cli/main.jl b/src/cli/main.jl
index 52e4080..20b8f4a 100644
--- a/src/cli/main.jl
+++ b/src/cli/main.jl
@@ -65,7 +65,7 @@ function parse_commandline(args::Union{Nothing, Array{String, 1}}=nothing) # TOD
             arg_type = Float64
             default = 0.2
 
-        "--no-ncvs-estimation"
+        "--no-ncv-estimation"
             dest_name = "estimate-ncvs"
             help = "Turns off neighborhood composition vectors estimation"
             action = :store_false

From 61a0f51c1c0a606c8d67715bf2b4796f61082ef3 Mon Sep 17 00:00:00 2001
From: Viktor Petukhov <viktor.s.petuhov@ya.ru>
Date: Wed, 1 Dec 2021 18:01:07 +0100
Subject: [PATCH 12/12] Updated CHANGELOG

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a1ec73..8f584d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,10 @@
 - Slightly optimized compilation time
 - Minor updates of core formulas
 
+### Added
+
+- CLI parameter `no-ncv-estimation` to disable estimation of NCVs
+
 ## [0.5.0] — 2021-06-03
 
 ### Changed