edit basic example

abhinavnatarajan · Dec 9, 2022 · 63ef519 · 63ef519
1 parent 3c26669
commit 63ef519
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 16 deletions.
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,14 +1,15 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RedClust = "bf1adee6-87fe-4679-8d23-51fe99940a25"
-StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd"
 
 [compat]
 Documenter = "0.27"
+Literate = "0.3"
 StatsBase = "0.33.8"
-julia = "1.7"
+julia = "1.7"
diff --git a/docs/make.jl b/docs/make.jl
@@ -37,11 +37,13 @@ function preprocess_jl(content)
     end
     return content
 end
+
 # For local builds, we gitignore folder names starting with and underscore. On GH-pages Jekyll ignores folders starting with an underscore, which we want to avoid. Therefore we place files generated by Literate.jl into a folder whose name depends on whether we have a local build. 
 genfolder = local_build ? "_generated" : "generated" 
 inputdir = joinpath(@__DIR__, "..", "examples")
 inputfile = joinpath(inputdir, "basic_example.jl")
 outputdir = joinpath(@__DIR__, "src", genfolder)
+
 # Create the example file in the docs
 Literate.markdown(inputfile, outputdir; 
 name = "example", preprocess = preprocess_md)

diff --git a/examples/basic_example.jl b/examples/basic_example.jl
@@ -31,10 +31,12 @@ gridlinewidth = 1,
 framestyle = :box,
 linecolor = :match,
 linewidth = 0.5,
-guidefontsize = 16, 
-tickfontsize = 16, 
-colorbar_tickfontsize = 16, 
-legend_font_pointsize = 16)
+guidefontsize = 14, 
+tickfontsize = 12, 
+colorbar_tickfontsize = 12, 
+legend_font_pointsize = 12, 
+plot_titlefontsize = 14
+)
 
 # seed the default RNG so that documentation remains stable 
 seed!(44)
@@ -114,7 +116,7 @@ sqmatrixplot(adjacencymatrix(clusts), title = "Adjacency Matrix")
 # We can visualise the oracle co-clustering matrix. This matrix is the matrix of co-clustering probabilities of the observations conditioned upon the data generation process. This takes into account full information about the cluster weights (and how they are generated), the mixture kernels for each cluster, and the location and scale parameters for these kernels. 
 sqmatrixplot(oracle_coclustering, title = "Oracle Coclustering Probabilities")
 # We can visualise the distance matrix of the observations.
-sqmatrixplot(distmatrix)
+sqmatrixplot(distmatrix, title = "Matrix of Pairwise Distances")
 # We can also plot the histogram of distances, grouped by whether they are inter-cluster distances (ICD) or within-cluster distances (WCD).
 begin 
     empirical_intracluster = uppertriangle(distmatrix)[
@@ -132,23 +134,23 @@ end
 #md # ## Prior Hyperparameters
 # RedClust includes the function [`fitprior`](@ref) to heuristically choose prior hyperparameters based on the data.
 params = fitprior(points, "k-means", false)
-# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the predictive distribution based on the prior. 
+# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the (marginal) prior predictive distribution.
 begin 
     pred_intracluster = sampledist(params, "intracluster", 10000)
     pred_intercluster = sampledist(params, "intercluster", 10000)
     density(pred_intracluster, 
     label="Simulated WCD", xlabel = "Distance", ylabel = "Density", 
-    size = (700, 500), 
     linewidth = 2, linestyle = :dash)
     density!(empirical_intracluster, 
     label="Empirical WCD", 
-    linewidth = 2, primary = false)
+    color = 1, linewidth = 2)
     density!(pred_intercluster, 
     label="Simulated ICD", 
-    linewidth = 2, linestyle = :dash)
+    linewidth = 2, linestyle = :dash, color = 2)
     density!(empirical_intercluster, 
     label="Empirical ICD", 
-    linewidth = 2, primary = false)
+    linewidth = 2, color = 2)
+    title!("Distances: Prior Predictive vs Empirical Distribution")
 end
 # We can also evaluate the prior hyperparameters by checking the marginal predictive distribution on ``K`` (the number of clusters). 
 begin
@@ -175,8 +177,8 @@ result = runsampler(data, options, params)
 sqmatrixplot(combine_sqmatrices(result.posterior_coclustering, oracle_coclustering), 
 title="Posterior vs Oracle Coclustering Probabilities")
 # Plot the posterior distribution of K:
-histogram_pmf(result.K, xlabel = "K", ylabel = "PMF", 
-size = (400, 400), title = "Posterior Distribution of K")
+histogram_pmf(result.K, 
+xlabel = "K", ylabel = "PMF", title = "Posterior Distribution of K")
 # Plot the posterior distribution of r:
 begin
     histogram(result.r, normalize = :pdf,