diff --git a/docs/Project.toml b/docs/Project.toml index 9e220ec..0d59a1d 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,14 +1,15 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RedClust = "bf1adee6-87fe-4679-8d23-51fe99940a25" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" -LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" [compat] Documenter = "0.27" +Literate = "0.3" StatsBase = "0.33.8" -julia = "1.7" \ No newline at end of file +julia = "1.7" diff --git a/docs/make.jl b/docs/make.jl index 872b618..41c6199 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -37,11 +37,13 @@ function preprocess_jl(content) end return content end + # For local builds, we gitignore folder names starting with and underscore. On GH-pages Jekyll ignores folders starting with an underscore, which we want to avoid. Therefore we place files generated by Literate.jl into a folder whose name depends on whether we have a local build. genfolder = local_build ? "_generated" : "generated" inputdir = joinpath(@__DIR__, "..", "examples") inputfile = joinpath(inputdir, "basic_example.jl") outputdir = joinpath(@__DIR__, "src", genfolder) + # Create the example file in the docs Literate.markdown(inputfile, outputdir; name = "example", preprocess = preprocess_md) diff --git a/examples/basic_example.jl b/examples/basic_example.jl index 7b28e1f..d3ae6f5 100644 --- a/examples/basic_example.jl +++ b/examples/basic_example.jl @@ -31,10 +31,12 @@ gridlinewidth = 1, framestyle = :box, linecolor = :match, linewidth = 0.5, -guidefontsize = 16, -tickfontsize = 16, -colorbar_tickfontsize = 16, -legend_font_pointsize = 16) +guidefontsize = 14, +tickfontsize = 12, +colorbar_tickfontsize = 12, +legend_font_pointsize = 12, +plot_titlefontsize = 14 +) # seed the default RNG so that documentation remains stable seed!(44) @@ -114,7 +116,7 @@ sqmatrixplot(adjacencymatrix(clusts), title = "Adjacency Matrix") # We can visualise the oracle co-clustering matrix. This matrix is the matrix of co-clustering probabilities of the observations conditioned upon the data generation process. This takes into account full information about the cluster weights (and how they are generated), the mixture kernels for each cluster, and the location and scale parameters for these kernels. sqmatrixplot(oracle_coclustering, title = "Oracle Coclustering Probabilities") # We can visualise the distance matrix of the observations. -sqmatrixplot(distmatrix) +sqmatrixplot(distmatrix, title = "Matrix of Pairwise Distances") # We can also plot the histogram of distances, grouped by whether they are inter-cluster distances (ICD) or within-cluster distances (WCD). begin empirical_intracluster = uppertriangle(distmatrix)[ @@ -132,23 +134,23 @@ end #md # ## Prior Hyperparameters # RedClust includes the function [`fitprior`](@ref) to heuristically choose prior hyperparameters based on the data. params = fitprior(points, "k-means", false) -# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the predictive distribution based on the prior. +# We can check how good the chosen prior hyperparameters are by comparing the empirical distribution of distances to the (marginal) prior predictive distribution. begin pred_intracluster = sampledist(params, "intracluster", 10000) pred_intercluster = sampledist(params, "intercluster", 10000) density(pred_intracluster, label="Simulated WCD", xlabel = "Distance", ylabel = "Density", - size = (700, 500), linewidth = 2, linestyle = :dash) density!(empirical_intracluster, label="Empirical WCD", - linewidth = 2, primary = false) + color = 1, linewidth = 2) density!(pred_intercluster, label="Simulated ICD", - linewidth = 2, linestyle = :dash) + linewidth = 2, linestyle = :dash, color = 2) density!(empirical_intercluster, label="Empirical ICD", - linewidth = 2, primary = false) + linewidth = 2, color = 2) + title!("Distances: Prior Predictive vs Empirical Distribution") end # We can also evaluate the prior hyperparameters by checking the marginal predictive distribution on ``K`` (the number of clusters). begin @@ -175,8 +177,8 @@ result = runsampler(data, options, params) sqmatrixplot(combine_sqmatrices(result.posterior_coclustering, oracle_coclustering), title="Posterior vs Oracle Coclustering Probabilities") # Plot the posterior distribution of K: -histogram_pmf(result.K, xlabel = "K", ylabel = "PMF", -size = (400, 400), title = "Posterior Distribution of K") +histogram_pmf(result.K, +xlabel = "K", ylabel = "PMF", title = "Posterior Distribution of K") # Plot the posterior distribution of r: begin histogram(result.r, normalize = :pdf,