Skip to content

Commit

Permalink
edit basic example
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavnatarajan committed Dec 9, 2022
1 parent 3c26669 commit 63ef519
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 16 deletions.
9 changes: 5 additions & 4 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
RedClust = "bf1adee6-87fe-4679-8d23-51fe99940a25"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"

[compat]
Documenter = "0.27"
Literate = "0.3"
StatsBase = "0.33.8"
julia = "1.7"
julia = "1.7"
2 changes: 2 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,13 @@ function preprocess_jl(content)
end
return content
end

# For local builds, we gitignore folder names starting with and underscore. On GH-pages Jekyll ignores folders starting with an underscore, which we want to avoid. Therefore we place files generated by Literate.jl into a folder whose name depends on whether we have a local build.
genfolder = local_build ? "_generated" : "generated"
inputdir = joinpath(@__DIR__, "..", "examples")
inputfile = joinpath(inputdir, "basic_example.jl")
outputdir = joinpath(@__DIR__, "src", genfolder)

# Create the example file in the docs
Literate.markdown(inputfile, outputdir;
name = "example", preprocess = preprocess_md)
Expand Down
26 changes: 14 additions & 12 deletions examples/basic_example.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ gridlinewidth = 1,
framestyle = :box,
linecolor = :match,
linewidth = 0.5,
guidefontsize = 16,
tickfontsize = 16,
colorbar_tickfontsize = 16,
legend_font_pointsize = 16)
guidefontsize = 14,
tickfontsize = 12,
colorbar_tickfontsize = 12,
legend_font_pointsize = 12,
plot_titlefontsize = 14
)

# seed the default RNG so that documentation remains stable
seed!(44)
Expand Down Expand Up @@ -114,7 +116,7 @@ sqmatrixplot(adjacencymatrix(clusts), title = "Adjacency Matrix")
# We can visualise the oracle co-clustering matrix. This matrix is the matrix of co-clustering probabilities of the observations conditioned upon the data generation process. This takes into account full information about the cluster weights (and how they are generated), the mixture kernels for each cluster, and the location and scale parameters for these kernels.
sqmatrixplot(oracle_coclustering, title = "Oracle Coclustering Probabilities")
# We can visualise the distance matrix of the observations.
sqmatrixplot(distmatrix)
sqmatrixplot(distmatrix, title = "Matrix of Pairwise Distances")
# We can also plot the histogram of distances, grouped by whether they are inter-cluster distances (ICD) or within-cluster distances (WCD).
begin
empirical_intracluster = uppertriangle(distmatrix)[
Expand All @@ -132,23 +134,23 @@ end
#md # ## Prior Hyperparameters
# RedClust includes the function [`fitprior`](@ref) to heuristically choose prior hyperparameters based on the data.
params = fitprior(points, "k-means", false)
# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the predictive distribution based on the prior.
# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the (marginal) prior predictive distribution.
begin
pred_intracluster = sampledist(params, "intracluster", 10000)
pred_intercluster = sampledist(params, "intercluster", 10000)
density(pred_intracluster,
label="Simulated WCD", xlabel = "Distance", ylabel = "Density",
size = (700, 500),
linewidth = 2, linestyle = :dash)
density!(empirical_intracluster,
label="Empirical WCD",
linewidth = 2, primary = false)
color = 1, linewidth = 2)
density!(pred_intercluster,
label="Simulated ICD",
linewidth = 2, linestyle = :dash)
linewidth = 2, linestyle = :dash, color = 2)
density!(empirical_intercluster,
label="Empirical ICD",
linewidth = 2, primary = false)
linewidth = 2, color = 2)
title!("Distances: Prior Predictive vs Empirical Distribution")
end
# We can also evaluate the prior hyperparameters by checking the marginal predictive distribution on ``K`` (the number of clusters).
begin
Expand All @@ -175,8 +177,8 @@ result = runsampler(data, options, params)
sqmatrixplot(combine_sqmatrices(result.posterior_coclustering, oracle_coclustering),
title="Posterior vs Oracle Coclustering Probabilities")
# Plot the posterior distribution of K:
histogram_pmf(result.K, xlabel = "K", ylabel = "PMF",
size = (400, 400), title = "Posterior Distribution of K")
histogram_pmf(result.K,
xlabel = "K", ylabel = "PMF", title = "Posterior Distribution of K")
# Plot the posterior distribution of r:
begin
histogram(result.r, normalize = :pdf,
Expand Down

0 comments on commit 63ef519

Please sign in to comment.