From b5f70aece07571d8891ecf1be4273e877c017733 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Mon, 15 Apr 2024 09:54:01 +0100 Subject: [PATCH] [WIP] Update fsdocs and move tutorials into docs --- .config/dotnet-tools.json | 12 +- build.fsx | 8 +- .../components => components}/allometric.fsx | 0 docs/_template.ipynb | 0 docs/confidence-intervals.fsx | 30 +- docs/data.fsx | 4 +- docs/dendro.fsx | 4 +- docs/diagnostics.fsx | 10 +- docs/estimation-engine.fsx | 51 +- {samples => docs/examples}/data/lynx-hare.csv | 0 docs/examples/plant-temperature.fsx | 173 +++++++ docs/examples/predator-prey.fsx | 220 ++++++++ docs/examples/shrub-resource.fsx | 480 ++++++++++++++++++ docs/examples/tutorial.fsx | 209 ++++++++ docs/index.fsx | 19 +- docs/integration.fsx | 4 +- docs/language.fsx | 4 +- docs/logging.fsx | 4 +- docs/model-fitting.fsx | 4 +- docs/model-selection.fsx | 4 +- docs/model.fsx | 9 +- docs/optimisation.fsx | 39 +- docs/tutorial.fsx | 209 -------- docs/workflow.fsx | 22 +- samples/1-population-resource.fsx | 82 --- samples/2-external-environment.fsx | 133 ----- samples/3-shrub-nitrogen.fsx | 462 ----------------- 27 files changed, 1222 insertions(+), 974 deletions(-) rename {samples/components => components}/allometric.fsx (100%) create mode 100644 docs/_template.ipynb rename {samples => docs/examples}/data/lynx-hare.csv (100%) create mode 100644 docs/examples/plant-temperature.fsx create mode 100644 docs/examples/predator-prey.fsx create mode 100644 docs/examples/shrub-resource.fsx create mode 100644 docs/examples/tutorial.fsx delete mode 100644 docs/tutorial.fsx delete mode 100755 samples/1-population-resource.fsx delete mode 100644 samples/2-external-environment.fsx delete mode 100644 samples/3-shrub-nitrogen.fsx diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json index 3e48e33..acb85e0 100644 --- a/.config/dotnet-tools.json +++ b/.config/dotnet-tools.json @@ -14,17 +14,17 @@ "paket" ] }, - "fsharp.formatting.commandtool": { - "version": "11.4.3", - "commands": [ - "fsdocs" - ] - }, "fantomas": { "version": "6.3.1", "commands": [ "fantomas" ] + }, + "fsdocs-tool": { + "version": "20.0.0", + "commands": [ + "fsdocs" + ] } } } \ No newline at end of file diff --git a/build.fsx b/build.fsx index 4471b1d..2d36f8d 100644 --- a/build.fsx +++ b/build.fsx @@ -157,9 +157,9 @@ Target.create "DocsMeta" (fun _ -> sprintf "%s/master/docs/content/logo.png" repositoryContentUrl sprintf "%s" tags sprintf "%s" release.NugetVersion - sprintf "%s/master/docs/img/logo.png" repositoryContentUrl - sprintf "%s/blob/master/LICENSE.md" repositoryUrl - sprintf "%s/blob/master/RELEASE_NOTES.md" repositoryUrl + sprintf "/img/logo.png" + sprintf "%s/blob/master/LICENSE" repositoryUrl + sprintf "%s/blob/master/RELEASE_NOTES.MD" repositoryUrl "true" "default" "" @@ -169,7 +169,7 @@ Target.create "DocsMeta" (fun _ -> Target.create "GenerateDocs" (fun _ -> Fake.IO.Shell.cleanDir ".fsdocs" - DotNet.exec id "fsdocs" "build --clean" |> ignore + DotNet.exec id "fsdocs" "build --clean --eval --strict" |> ignore ) // -------------------------------------------------------------------------------------- diff --git a/samples/components/allometric.fsx b/components/allometric.fsx similarity index 100% rename from samples/components/allometric.fsx rename to components/allometric.fsx diff --git a/docs/_template.ipynb b/docs/_template.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/docs/confidence-intervals.fsx b/docs/confidence-intervals.fsx index a043042..5aa2a80 100644 --- a/docs/confidence-intervals.fsx +++ b/docs/confidence-intervals.fsx @@ -12,11 +12,11 @@ index: 5 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -37,20 +37,20 @@ open Bristlecone open Bristlecone.Optimisation open Bristlecone.Data -// fitFn = -fun engine dataset hypothesis result -> +// // fitFn = +// fun engine dataset hypothesis result -> - // The function used to fit the model, which unless an - // advanced scenario is usually Bristlecone.fit - let fitFn = Bristlecone.fit +// // The function used to fit the model, which unless an +// // advanced scenario is usually Bristlecone.fit +// let fitFn = Bristlecone.fit - // The number of jumps to perform in parameter space - let n = 10000 +// // The number of jumps to perform in parameter space +// let n = 10000 - let ci = ConfidenceInterval.ProfileLikelihood.profile fitFn engine dataset hypothesis n result +// let ci = ConfidenceInterval.ProfileLikelihood.profile fitFn engine dataset hypothesis n result - // Optionally, save the result - let saveDir = "/some/save/dir" - let subjectName = "some subject" - let modelId = "some model hypothesis" - Confidence.save saveDir subjectName modelId result.ResultId ci \ No newline at end of file +// // Optionally, save the result +// let saveDir = "/some/save/dir" +// let subjectName = "some subject" +// let modelId = "some model hypothesis" +// Confidence.save saveDir subjectName modelId result.ResultId ci \ No newline at end of file diff --git a/docs/data.fsx b/docs/data.fsx index bf1a9a1..e3ccf67 100644 --- a/docs/data.fsx +++ b/docs/data.fsx @@ -12,11 +12,11 @@ index: 1 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/dendro.fsx b/docs/dendro.fsx index 4fb7cc5..8855a72 100644 --- a/docs/dendro.fsx +++ b/docs/dendro.fsx @@ -11,11 +11,11 @@ index: 1 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/diagnostics.fsx b/docs/diagnostics.fsx index dea6cce..e92dea9 100644 --- a/docs/diagnostics.fsx +++ b/docs/diagnostics.fsx @@ -12,11 +12,11 @@ index: 5 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -69,9 +69,9 @@ of the `IComponentLogger)` type: [ NB TODO: This function must be refactored to work with the new Bristlecone Language ] *) -open Bristlecone -open Bristlecone.Language -open Bristlecone.Diagnostics.ModelComponents +// open Bristlecone +// open Bristlecone.Language +// open Bristlecone.Diagnostics.ModelComponents // let hypothesis (cLog:IComponentLogger) = diff --git a/docs/estimation-engine.fsx b/docs/estimation-engine.fsx index 1e13132..707d404 100644 --- a/docs/estimation-engine.fsx +++ b/docs/estimation-engine.fsx @@ -12,11 +12,11 @@ index: 3 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -31,4 +31,49 @@ will suffice for all model hypotheses. Forthcoming -*) \ No newline at end of file +*) + + +(**## Time Modes + +Bristlecone can run models in either discrete time, or continuous time. When running models in continuous time, an integration function is required: +*) + +(** +Currently only fixed timesteps are supported, but variable timestep support (e.g. for sediment core data) is planned. + +Two integration functions are included: + +| Solver | Function | Description | +| - | - | - | +| Runge-Kutta 4 (MathNet Numerics) | ``Integration.MathNet.integrate`` | A fourth-order Runge Kutta method to provide approximate solutions to ODE systems. | +| Runge-Kutta 547M (Open Solving Library for ODEs - Microsoft Research) | ``Integration.MsftOslo.integrate`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| + +## Optimisation + +Bristlecone supports optimisation functions that have the following type signature: + +```fsharp +type Optimise<'data> = int -> int -> Domain -> ('data[] -> 'data) -> ('data * 'data []) list +``` + +There are two optimsation techniques currently built-in: + +| Method | Function | Description | +| - | - | - | +| Amoeba (Nelder-Mead) | ``Optimisation.Amoeba.solve`` | A gradient-descent method. | +| MCMC Random Walk | ``Optimisation.MonteCarlo.randomWalk`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| + + +## Estimation Engines + +To use Bristlecone functions requires a configured ``EstimationEngine``. The easiest way is with the helper functions within the ``Bristlecone`` module: + +| Function | Type | Description | +| - | - | - | +| ``mkContinuous`` | EstimationEngine | Default continuous engine | +| ``mkDiscrete`` | EstimationEngine | Default discrete model engine | +| ``withContinuousTime`` | t : Integrate<'a,'b> -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Transforms engine to continuous time mode, using the given integrator function. | +| ``withConditioning`` | c: Conditioning -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Choose how the start point is chosen when solving the model system. | + +**) \ No newline at end of file diff --git a/samples/data/lynx-hare.csv b/docs/examples/data/lynx-hare.csv similarity index 100% rename from samples/data/lynx-hare.csv rename to docs/examples/data/lynx-hare.csv diff --git a/docs/examples/plant-temperature.fsx b/docs/examples/plant-temperature.fsx new file mode 100644 index 0000000..1be8a77 --- /dev/null +++ b/docs/examples/plant-temperature.fsx @@ -0,0 +1,173 @@ +(** +--- +title: Plant response to temperature +category: Examples +categoryindex: 2 +index: 1 +--- +*) + +(*** condition: prepare ***) +#nowarn "211" +#r "../../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" +#r "nuget: MathNet.Numerics.FSharp,5.0.0" +(*** condition: fsx ***) +#if FSX +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // FSX +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // IPYNB + +(** +# Growth of tree rings in response to temperature +This Bristlecone example demonstrates model-fitting +using high-resolution monthly temperature data with +annual ring width increments. + +First, we must reference Bristlecone and open it: +*) +open Bristlecone // Opens Bristlecone core library and estimation engine +open Bristlecone.Language // Open the language for writing Bristlecone models +open Bristlecone.Time + +(** +### Step 1. Defining the ecological model + +Here, we + +*) + + +// 1. Model System +// ---------------------------- + +let hypothesis = + + /// The universal gas constant in J mol−1 K−1 + let gasConstant = Constant 8.314 + + /// An Arrhenius function to represent temperature limitation on growth. + /// Form of equation: https://pubag.nal.usda.gov/download/13565/PDF + let temperatureLimitation = + Constant System.Math.E + ** ((Constant 1000. * Parameter "Ea" * (Environment "temperature" - Constant 298.)) + / (Constant 298. * gasConstant * Environment "temperature")) + + /// Plant growth is a function of net photosynthesis minus environmental losses. + /// Photosynthesis is limited by light and temperature. + let ``db/dt`` = This * Parameter "r" * temperatureLimitation - Parameter "γB" * This + + Model.empty + |> Model.addEquation "stem radius" ``db/dt`` + |> Model.estimateParameter "Ea" noConstraints 40.0 50.0 + |> Model.estimateParameter "γB" noConstraints 0.01 0.40 + |> Model.estimateParameter "r" notNegative 0.01 1.00 + |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.sumOfSquares [ "stem radius" ]) + |> Model.compile + + +// 2. Setup Bristlecone Engine +// ---------------------------- +// A bristlecone engine provides a fixed setup for estimating parameters from data. +// Use the same engine for all model fits within a single study. + +let engine = + Bristlecone.mkContinuous + |> Bristlecone.withContinuousTime Integration.MathNet.integrate + |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint + |> Bristlecone.withTunedMCMC + [ Optimisation.MonteCarlo.TuneMethod.CovarianceWithScale 0.200, + 250, + Optimisation.EndConditions.afterIteration 20000 ] + + +// 3. Test Engine and Model +// ---------------------------- +// Running a full test is strongly recommended. The test will demonstrate if the current +// configuration can find known parameters for a model. If this step fails, there is an +// issue with either your model, or the Bristlecone configuration. + +// 0. Configure Options +// ---------------------------- + +module Settings = + + /// Bristlecone can use latitude and longitude to compute day length. + let latitude, longitude = 63.2, 15.2 + let endWhen = Optimisation.EndConditions.afterIteration 1 + + +// let startValues = +// [ ShortCode.create "lynx", 30.09; ShortCode.create "hare", 19.58 ] |> Map.ofList + +// // TODO Test settings new format + +// hypothesis +// |> Bristlecone.testModel engine Options.testSeriesLength startValues Options.iterations [] + + +// // 4. Load Real Data +// // ---------------------------- +// // Here, we are using the FSharp.Data type provider to read in .csv datasets. +// // We prepare the monthly temperature dataset for the analysis by: +// // a) Loading the daily dataset using the FSharp.Data library; +// // b) Replacing NaN values with the F# 'None' option type; +// // c) Converting the dataset into a Bristlecone time series type; +// // d) Interpolate the 'None' values by linear interpolation to fill in missing measurements; +// // e) Generalise the time series from daily to monthly by applying an average function. + +// open FSharp.Data + +// // TODO Is there a way to streamline this? +// [] +// let DailyTemperatureUrl = __SOURCE_DIRECTORY__ + "/data/mean-temperature-daily.csv" + +// let meanTemperatureMonthly = +// let maxTemperatures = CsvProvider.Load DailyTemperatureUrl + +// maxTemperatures.Rows +// |> Seq.map (fun r -> +// ((if System.Double.IsNaN r.``T[avg]`` then +// None +// else +// Some(r.``T[avg]`` + 273.15)), +// r.Date)) +// |> TimeSeries.fromObservations +// |> TimeSeries.interpolate +// |> TimeSeries.generalise (FixedTemporalResolution.Months(PositiveInt.create 1)) (fun x -> x |> Seq.averageBy fst) + + +// module Test = + +// open Bristlecone.Test + +// let settings = TestSettings.Default + +// let testSettings = +// { Resolution = Years 1 +// TimeSeriesLength = 30 +// StartValues = [ code "b", 5.; code "t", 255. ] |> Map.ofList +// EndCondition = Settings.endWhen +// GenerationRules = +// [ "b" |> GenerationRules.alwaysLessThan 1000000. +// "b" |> GenerationRules.alwaysMoreThan 0. +// code "b", (fun data -> (data |> Seq.max) - (data |> Seq.min) > 100.) ] +// NoiseGeneration = fun p data -> data +// EnvironmentalData = [ code "t", TemperatureData.monthly ] |> Map.ofList +// Random = MathNet.Numerics.Random.MersenneTwister() +// StartDate = System.DateTime(1970, 01, 01) +// Attempts = 50000 } + +// let run () = +// hypothesis |> Bristlecone.testModel Settings.engine testSettings + + +// let testResult = Test.run () + +// // 4. Fit Model to Real Data +// // ----------------------------------- +// let result = +// hypothesis +// |> Bristlecone.fit engine (Optimisation.EndConditions.afterIteration Options.iterations) data diff --git a/docs/examples/predator-prey.fsx b/docs/examples/predator-prey.fsx new file mode 100644 index 0000000..a8ccc60 --- /dev/null +++ b/docs/examples/predator-prey.fsx @@ -0,0 +1,220 @@ +(** +--- +title: Predator-Prey Dynamics +category: Examples +categoryindex: 1 +index: 1 +--- + +[![Script](img/badge-script.svg)]({{root}}/{{fsdocs-source-basename}}.fsx)  +[![Notebook](img/badge-notebook.svg)]({{root}}/{{fsdocs-source-basename}}.ipynb) +*) + +(*** condition: prepare ***) +#nowarn "211" +#r "nuget:MathNet.Numerics.FSharp,4.15" +#r "../../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" +(*** condition: fsx ***) +#if FSX +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // FSX +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // IPYNB + +#r "nuget:RProvider" + +(** +Predator-Prey Dynamics: Snowshoe Hare and Lynx +--- + +Here we use the classic example of snowshoe hare and lynx predator-prey dynamics, +to demonstrate the basic functions of Bristlecone. The dataset is a 90-year +time-series of snowshoe hare and lynx pelts purchased by the +Hudson's Bay Company of Canada. Data is in 1000s. + +To get started, we first load and open the Bristlecone library in +an F# script file (.fsx): +*) + +open Bristlecone // Opens Bristlecone core library and estimation engine +open Bristlecone.Language // Open the language for writing Bristlecone models + +(**### Defining the ecological model + +In Bristlecone, a single ecological model (representing a single hypothesis) +is defined through the `ModelSystem` type. A `ModelSystem` needs to include three key +components: + +* **Model equations.** When working in continuous time, these are a system of Ordinary Differential Equations (ODEs). +* **Parameters to be estimated.** You must specify the starting bounds and constraints for every parameter included in the model equations. +* **Likelihood function**. The (negative log) likelihood function *-logL* represents the probability of observing the data given the parameter set. We use a negative log likelihood function, which is then minimised during optimisation. + +In this example, we demonstrate using the *Lotka–Volterra* predator–prey model as the +model system. For the -logL function we use a bivariate normal negative log likelihood function. +This -logL function assumes normally-distributed observation error around each observation +at each time-point, for both the lynx and hare data. The -logL function contains +three parameters that are to be estimated alongside the deterministic model: the variability +in lynx data, the variability in hare data, and their covariance. +*) + +let ``predator-prey`` = + + let ``dh/dt`` = Parameter "α" * This - Parameter "β" * This * Environment "lynx" + let ``dl/dt`` = - Parameter "γ" * This + Parameter "Δ" * Environment "hare" * This + + Model.empty + |> Model.addEquation "hare" ``dh/dt`` + |> Model.addEquation "lynx" ``dl/dt`` + + |> Model.estimateParameter "α" noConstraints 0.01 1.00 // Natural growth rate of hares in absence of predation + |> Model.estimateParameter "β" noConstraints 0.01 1.00 // Death rate per encounter of hares due to predation + |> Model.estimateParameter "Δ" noConstraints 0.01 0.20 // Efficiency of turning predated hares into lynx + |> Model.estimateParameter "γ" noConstraints 0.01 0.20 // Natural death rate of lynx in the absence of food + + |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.sumOfSquares ["hare"; "lynx"]) + |> Model.estimateParameter "ρ" noConstraints -0.500 0.500 + |> Model.estimateParameter "σ[x]" notNegative 0.001 0.100 + |> Model.estimateParameter "σ[y]" notNegative 0.001 0.100 + + |> Model.compile + + // TODO Error when setting constraints. Optim allows them to go negative! + +(** +### Setting up the *Bristlecone Engine* + +A bristlecone engine provides a fixed setup for estimating parameters from data. +Use the same engine for all model fits within a single study. +This engine uses a gradident descent method (Nelder Mead simplex), and a basic +Runge-Kutta 4 integration method provided by MathNet Numerics. +*) + +let engine = + Bristlecone.mkContinuous + |> Bristlecone.withTunedMCMC [] + |> Bristlecone.withContinuousTime Integration.MathNet.integrate + |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint + +(** +### Does it all work? Testing the engine and model + +Before being confident in the ability of our estimation engine to +be able to arrive at the correct solution, we must run a full test +of the model and estimation engine. + +Bristlecone includes the ``Bristlecone.testModel`` function, which +we use here. Given a model system and estimation engine, the function +generates a random parameter set (*θ*) many times; for each *θ*, the +'true' time-series are generated. The test result indicates the effectiveness +of the configuration at estimating *θ* given the auto-generated +time-series. If there is divergence, there is likely an +issue with your model or the Bristlecone configuration. + +Bristlecone includes many settings to configure the test +procedure. A simple test configuration is set as `Test.defaultSettings`, +but here we will configure some additional settings: +*) + +let testSettings = + Test.create + |> Test.addStartValues [ + "hare", 50. + "lynx", 75. ] + // |> Test.addNoise (Test.Noise.tryAddNormal "σ[y]" "lynx") + // |> Test.addNoise (Test.Noise.tryAddNormal "σ[x]" "hare") + |> Test.addGenerationRules [ + Test.GenerationRules.alwaysLessThan 10000. "lynx" + Test.GenerationRules.alwaysLessThan 10000. "hare" ] + |> Test.withTimeSeriesLength 30 + |> Test.endWhen (Optimisation.EndConditions.afterIteration 1) + +(** +In our `TestSettings`, we have specified the initial time point (t = 0) +for both modelled time-series. We have also added noise around +each generated time-series, and specified that each time-series +should be 30 years in length. + +With these test settings, we can now run the test. +*) + +let testResult = + ``predator-prey`` + |> Bristlecone.testModel engine testSettings +(*** include-fsi-merged-output ***) + +(** +We can check the test settings by... +*) + +module Graphing = + + // Testing out ggplot for graphics + open RProvider + open RProvider.Operators + open RProvider.ggplot2 + open RProvider.svglite + open RDotNet + + let (++) (a:SymbolicExpression) b = R.``+``([ a; b ]) + + let xyPlot (testResult:Result) = + + let data = + match testResult with + | Ok r -> + r.Series + |> Seq.collect(fun kv -> + kv.Value + |> Bristlecone.Time.TimeSeries.toObservations + |> Seq.map(fun (d,v) -> kv.Key, v, d.Fit, d.Obs)) + | Error _ -> [] + + let df = R.data_frame([ + "variable" => (data |> Seq.map(fun (a,_,_,_) -> a)) + "time" => (data |> Seq.map(fun (_,b,_,_) -> b)) + "modelled" => (data |> Seq.map(fun (_,_,c,_) -> c)) + "observed" => (data |> Seq.map(fun (_,_,_,d) -> d)) + ]) + + let s = R.svgstring() + + R.plot([ "x" => df?time; "y" => df?observed ]) + + // R.ggplot() + // ++ R.geom__line(R.aes(["x" => df?time, "y" => df?modelled, "group" => df?variable])) + // ++ R.geom__line(R.aes(["x" => df?time, "y" => df?observed, "group" => df?variable])) + // |> ignore + + let html = s.AsFunction().Invoke().GetValue() + RProvider.grDevices.R.dev_off() |> ignore + html + +let html = Graphing.xyPlot testResult +(*** include-it-raw ***) + +printfn "%s" html + +// // 3. Load in Real Data +// // ---------------------------- +// // Here, we are using the FSharp.Data type provider to read in a CSV file. + +// type PopulationData = FSharp.Data.CsvProvider<"/Users/andrewmartin/Documents/GitHub Projects/bristlecone/docs/examples/data/lynx-hare.csv"> +// let data = +// let csv = PopulationData.Load "/Users/andrewmartin/Documents/GitHub Projects/bristlecone/docs/examples/data/lynx-hare.csv" +// [ (code "hare").Value, Time.TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> float r.Hare, r.Year)) +// (code "lynx").Value, Time.TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> float r.Lynx, r.Year)) ] |> Map.ofList + + +// // 0. Configure Options +// // ---------------------------- + +// module Options = +// let iterations = 100000 + + +// // 4. Fit Model to Real Data +// // ----------------------------------- +// let result = ``predator-prey`` |> Bristlecone.fit engine (Optimisation.EndConditions.afterIteration Options.iterations) data + diff --git a/docs/examples/shrub-resource.fsx b/docs/examples/shrub-resource.fsx new file mode 100644 index 0000000..bdd24aa --- /dev/null +++ b/docs/examples/shrub-resource.fsx @@ -0,0 +1,480 @@ +(** +--- +title: Shrub response to a single limiting resource +category: Examples +categoryindex: 3 +index: 1 +--- +*) + +(*** condition: prepare ***) +#nowarn "211" +#r "../../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" +#r "nuget: MathNet.Numerics.FSharp,5.0.0" +(*** condition: fsx ***) +#if FSX +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // FSX +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // IPYNB + +//////////////////////////////////////////////////// +// Plant nitrogen limitation using wood rings +// and nitrogen isotopes +//////////////////////////////////////////////////// + +(* An example Bristlecone script for working with + wood ring datasets. *) + +open Bristlecone // Opens Bristlecone core library and estimation engine +open Bristlecone.Language // Open the language for writing Bristlecone models +open Bristlecone.Time + +// // 1. Define basic model system +// // ---------------------------- +// // First, we define a 'base model' into which we can insert +// // components that represent different hypotheses. + +// let baseModel = + +// /// Transform δ15N to N availability. +// let ``δ15N -> N availability`` = +// (Constant 100. * Environment "N" + Constant 309.) / Constant 359. + +// /// Plant uptake of N from soil, which may be turned on or off +// let uptake f geom = +// (geom This) * This * (f ``δ15N -> N availability``) + +// /// 1. Cumulative stem biomass +// let ``db/dt`` geom nLimitation = +// Parameter "r" * (uptake nLimitation geom) - Parameter "γ[b]" * This + +// /// 2. Soil nitrogen availability +// let ``dN/dt`` geom feedback limitationName nLimitation = +// if limitationName = "None" +// then Parameter "λ" - Parameter "γ[N]" * ``δ15N -> N availability`` + feedback This +// else +// Parameter "λ" - Parameter "γ[N]" * ``δ15N -> N availability`` +// + feedback This +// - (geom This) * This * (nLimitation ``δ15N -> N availability``) + +// /// 3. Stem radius (a 'Measurement' variable) +// let stemRadius lastRadius lastEnv env = +// let oldCumulativeMass = lastEnv |> lookup "bs" +// let newCumulativeMass = env |> lookup "bs" +// if (newCumulativeMass - oldCumulativeMass) > 0. +// then newCumulativeMass |> Allometric.Proxies.toRadiusMM +// else lastRadius + +// fun geom feedback (nLimitMode,nLimitation) -> +// Model.empty +// |> Model.addEquation "bs" (``db/dt`` geom nLimitation) +// |> Model.addEquation "N" (``dN/dt`` geom feedback nLimitMode nLimitation) +// |> Model.includeMeasure "x" stemRadius +// |> Model.estimateParameter "λ" notNegative 0.001 0.500 +// |> Model.estimateParameter "γ[N]" notNegative 0.001 0.200 +// |> Model.estimateParameter "γ[b]" notNegative 0.001 0.200 +// |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.bivariateGaussian "x" "N") +// |> Model.estimateParameter "ρ" noConstraints -0.500 0.500 +// |> Model.estimateParameter "σ[x]" notNegative 0.001 0.100 +// |> Model.estimateParameter "σ[y]" notNegative 0.001 0.100 + + +// // 2. Define competing hypotheses +// // ---------------------------- +// /// Define 12 alternative hypotheses by defining three interchangeable components: +// /// - Asymptotic plant size (2 types); +// /// - Plant-soil feedback presence / absence (2 types); and +// /// - Nitrogen limitation form (3 types). +// /// The product of each of the three components with the base model forms 12 +// /// alternative hypotheses, each represented as a `ModelSystem`. +// let hypotheses = + +// // 1. Setup two alternatives for geometric mode. +// let chapmanRichards mass = Constant 1. - (mass / (Parameter "k" * Constant 1000.)) +// let geometricModes = modelComponent "Geometric constraint" [ +// subComponent "None" (Constant 1. |> (*)) +// subComponent "Chapman-Richards" chapmanRichards +// |> estimateParameter "k" notNegative 3.00 5.00 // Asymptotic biomass (in kilograms) +// ] + +// // 2. Setup two alternatives for plant-soil feedbacks. +// let biomassLoss biomass = (Parameter "ɑ" / Constant 100.) * biomass * Parameter "γ[b]" +// let feedbackModes = modelComponent "Plant-Soil Feedback" [ +// subComponent "None" (Constant 1. |> (*)) +// subComponent "Biomass Loss" biomassLoss +// |> estimateParameter "ɑ" notNegative 0.01 1.00 // N-recycling efficiency +// ] + +// // 3. Setup three alternatives for the form of plant N limitation. + +// let saturating minimumNutrient nutrient = +// let hollingModel n = (Parameter "a" * n) / (Constant 1. + (Parameter "a" * Parameter "b" * Parameter "h" * n)) +// Conditional(fun compute -> +// if compute (hollingModel minimumNutrient) < 1e-12 +// then Invalid +// else hollingModel nutrient ) + +// let linear min resource = +// Conditional(fun compute -> +// if compute (Parameter "a" * min) < 1e-12 then Invalid else Parameter "a" * resource) + +// let limitationModes = modelComponent "N-limitation" [ +// subComponent "Saturating" (saturating (Constant 5.)) +// |> estimateParameter "a" notNegative 0.100 0.400 +// |> estimateParameter "h" notNegative 0.100 0.400 +// |> estimateParameter "r" notNegative 0.500 1.000 +// subComponent "Linear" (linear (Constant 5.)) +// |> estimateParameter "a" notNegative 0.100 0.400 +// |> estimateParameter "r" notNegative 0.500 1.000 +// subComponent "None" (Constant 1. |> (*)) +// |> estimateParameter "r" notNegative 0.500 1.000 +// ] + +// baseModel +// |> Hypotheses.createFromComponent geometricModes +// |> Hypotheses.useAnother feedbackModes +// |> Hypotheses.useAnotherWithName limitationModes +// |> Hypotheses.compile + + +// // 3. Setup Bristlecone Engine +// // ---------------------------- +// // A bristlecone engine provides a fixed setup for estimating parameters from data. +// // Use the same engine for all model fits within a single study. + +// let engine = +// Bristlecone.mkContinuous +// |> Bristlecone.withContinuousTime Integration.MathNet.integrate +// |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint +// |> Bristlecone.withTunedMCMC [ Optimisation.MonteCarlo.TuneMethod.CovarianceWithScale 0.200, 250, Optimisation.EndConditions.afterIteration 20000 ] + + +// // 4. Test Engine and Model +// // ---------------------------- +// // Running a full test is strongly recommended. The test will demonstrate if the current +// // configuration can find known parameters for a model. If this step fails, there is an +// // issue with either your model, or the Bristlecone configuration. + +// let testSettings = +// Test.create +// |> Test.addNoise (Test.Noise.tryAddNormal "sigma[y]" "N") +// |> Test.addNoise (Test.Noise.tryAddNormal "sigma[x]" "bs") +// |> Test.addGenerationRules [ +// Test.GenerationRules.alwaysMoreThan -3. "N" +// Test.GenerationRules.alwaysLessThan 20. "N" +// Test.GenerationRules.alwaysMoreThan 0. "bs" +// Test.GenerationRules.monotonicallyIncreasing "x" ] // There must be at least 10mm of wood production +// |> Test.addStartValues [ +// "x", 5.0 +// "bs", 5.0 |> Allometric.Proxies.toBiomassMM +// "N", 3.64 ] +// |> Test.withTimeSeriesLength 30 +// |> Test.endWhen (Optimisation.EndConditions.afterIteration 1000) + +// let testResult = +// hypotheses +// |> List.map(fst >> Bristlecone.testModel engine testSettings) + + +// // 5. Load Real Data +// // ---------------------------- +// // Here, we are using the Bristlecone.Dendro package to +// // read in dendroecological data. + +// open Bristlecone.Dendro + +// let shrubData = +// let plants = Data.PlantIndividual.loadRingWidths (__SOURCE_DIRECTORY__ + "/../data/yamal-rw.csv") +// let isotopeData = Data.PlantIndividual.loadLocalEnvironmentVariable (__SOURCE_DIRECTORY__ + "/../data/yuribei-d15N-imputed.csv") +// plants |> PlantIndividual.zipEnvMany "N" isotopeData + + +// // 6. Fit Models to Real Data +// // ----------------------------------- + + + +// // 7. Calculate model comparison statistics +// // ----------------------------------- + + +// // 3. Load Real Data and Estimate +// // ---------------------------- + +// // Define the start values for a model system, as follows: +// // initial radius = +// let startValues (startDate:System.DateTime) (plant:PlantIndividual.PlantIndividual) = +// let initialRadius = +// match plant.Growth with +// | PlantIndividual.PlantGrowth.RingWidth s -> +// match s with +// | GrowthSeries.Absolute c -> c.Head |> fst +// | _ -> invalidOp "Not applicable" +// | _ -> invalidOp "Not applicable" +// let initialMass = initialRadius |> ModelComponents.Proxies.toBiomassMM +// let initialNitrogen = plant.Environment.[code "N"].Head |> fst +// [ ("x", initialRadius) +// ("N", initialNitrogen) +// ("bs", initialMass) ] |> Map.ofList + +// let workPackages shrubs hypotheses engine saveDirectory = +// seq { +// for s in shrubs do + +// // 1. Arrange the subject and settings +// let shrub = s |> PlantIndividual.toCumulativeGrowth +// let common = shrub |> PlantIndividual.keepCommonYears +// let startDate = (common.Environment.["N"]).StartDate |> snd +// let startConditions = getStartValues startDate shrub +// let e = engine |> Bristlecone.withConditioning (Custom startConditions) + +// // 2. Setup batches of dependent analyses +// for h in [ 1 .. hypotheses |> List.length ] do +// for _ in [ 1 .. Options.chains ] do +// if h = 1 || h = 2 || h = 7 || h = 8 || h = 9 || h = 10 then yield async { +// // A. Compute result +// let result = Bristlecone.PlantIndividual.fit e Options.endWhen hypotheses.[h-1] common |> fst +// // B. Save to file +// Bristlecone.Data.EstimationResult.saveAll saveDirectory s.Identifier.Value h 1 result +// return result } +// } + +// // Orchestrate the analyses +// let work = workPackages shrubs hypotheses Options.engine Options.resultsDirectory +// let run() = work |> Seq.iter (OrchestrationMessage.StartWorkPackage >> Options.orchestrator.Post) + + +// let saveDiagnostics () = + +// // 1. Get all results sliced by plant and hypothesis +// let results = +// let get subject model modelId = Bristlecone.Data.EstimationResult.loadAll Options.resultsDirectory subject.Identifier.Value model modelId +// Bristlecone.ModelSelection.ResultSet.arrangeResultSets shrubs hypotheses get + +// // 2. Save convergence statistics to file +// // NB include only results within 5 likelihood of the minimum (to remove outliers) +// results +// // |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) +// // |> Seq.toList +// |> Diagnostics.Convergence.gelmanRubinAll 10000 +// |> Data.Convergence.save Options.resultsDirectory + +// // 3. Save Akaike weights to file +// results +// |> ModelSelection.Select.weights +// |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) +// |> Data.ModelSelection.save Options.resultsDirectory + +// // 4. Save out logged components +// // results +// // |> Seq.map(fun r -> calculateComponents fit Options.engine r) + + + + + +// // What about one-step-ahead predictions? + +// open Bristlecone.Diagnostics.ModelComponents +// open FSharp.Data + +// // 1. Load in all MLE results and pick the best for each shrub x hypothesis. +// // - Must convert from old bristlecone format to new one. + +// module LoadOldTrace = + +// open Bristlecone.Data.Trace +// open Bristlecone.Data.Config + +// type OldTrace = CsvProvider<"/Users/andrewmartin/Desktop/Bristlecone Results/Paper1-Yuribei-Annual/dphil-shrub-output-YUSL03A-1-92aadb09-f035-4373-aacd-a16ed7dec822.csv"> + +// let fileMatch directory subject modelId = +// let path = System.IO.DirectoryInfo(directory) +// if path.Exists then +// let files = path.GetFiles(sprintf "dphil-shrub-output-%s-%i-*.csv" subject modelId) +// let regex = sprintf "dphil-shrub-output-%s-%i-(%s).csv" subject modelId regexGuid +// files |> Seq.choose(fun f -> +// let m = System.Text.RegularExpressions.Regex.Match(f.Name, regex) +// if m.Success +// then +// let guid = m.Groups.[1].Value |> System.Guid.Parse +// (guid, f) |> Some +// else None ) +// else invalidArg "directory" "The specified directory does not exist" + +// let toTrace (data:OldTrace) : (float * float []) list = +// data.Rows +// |> Seq.groupBy(fun r -> r.Iteration) +// |> Seq.map(fun (i,r) -> +// i, +// (r |> Seq.head).NegativeLogLikelihood, +// r |> Seq.map(fun r -> r.ParameterValue) |> Seq.toArray) +// |> Seq.sortByDescending(fun (i,_,_) -> i) +// |> Seq.map(fun (_,x,y) -> x,y) +// |> Seq.toList + +// let loadOldTrace directory subject modelId = +// let traceFiles = fileMatch directory subject modelId +// traceFiles +// |> Seq.choose(fun (i,f) -> +// printfn "File loading is %s" f.FullName +// let data = OldTrace.Load f.FullName +// match data.Rows |> Seq.length with +// | 0 -> None +// | _ -> data |> toTrace |> Some ) + +// let oldShrubDir = "/Users/andrewmartin/Documents/DPhil Zoology/Bristlecone Results/YuribeiAnnual/" + +// let oldMleResults = +// seq { +// for s in shrubs do +// for hi in [ 1 .. 12 ] do +// let oldResult = LoadOldTrace.loadOldTrace oldShrubDir s.Identifier.Value hi |> Seq.concat |> Seq.toList +// let mle = oldResult |> Seq.minBy(fun (p,l) -> p) +// yield s, hi, mle +// } +// |> Seq.toList + + +// // 2. Setup each so that + +// module Cool = + +// open Bristlecone.Bristlecone +// open Bristlecone.Optimisation + +// /// "How good am I at predicting the next data point"? +// /// +// let oneStepAhead engine hypothesis (preTransform:CodedMap>->CodedMap>) (timeSeries) (estimatedTheta:ParameterPool) = +// let mleToBounds mlePool = mlePool |> Map.map(fun k v -> Parameter.create (Parameter.detatchConstraint v |> snd) (v |> Parameter.getEstimate) (v |> Parameter.getEstimate)) +// let hypothesisMle : ModelSystem = { hypothesis with Parameters = mleToBounds estimatedTheta } +// let pairedDataFrames = +// timeSeries +// |> Map.map(fun _ fitSeries -> +// fitSeries +// |> TimeSeries.toObservations +// |> Seq.pairwise +// |> Seq.map (fun (t1,t2) -> TimeSeries.fromObservations [t1; t2] |> TimeSeries.map(fun (x,y) -> x ))) +// printfn "Paired data frames = %A" pairedDataFrames +// let timeParcelCount = (pairedDataFrames |> Seq.head).Value |> Seq.length +// printfn "Time parcels: %i" timeParcelCount +// let data = +// seq { 1 .. timeParcelCount } +// |> Seq.map(fun i -> pairedDataFrames |> Map.map(fun _ v -> v |> Seq.item (i-1)) |> preTransform) +// printfn "Data: %A" data + +// // TODO Remove this hack: +// // It is predicting with a repeated first point... so... +// // The next point estimate is at t1 +// // The next point observation is at t2 +// data +// |> Seq.map (fun d -> +// let est = Bristlecone.fit (engine |> withCustomOptimisation Optimisation.None.passThrough |> withConditioning RepeatFirstDataPoint) (EndConditions.afterIteration 0) d hypothesisMle +// printfn "Estimated time series is %A" (est |> fst).Series + +// let nextObservation = d |> Map.map(fun c ts -> ts |> TimeSeries.toObservations |> Seq.skip 1 |> Seq.head) +// let paired = +// nextObservation +// |> Map.map(fun code obs -> +// let nextEstimate = ((est |> fst).Series.[code].Values |> Seq.head).Fit +// obs |> snd, obs |> fst, nextEstimate +// ) + +// paired +// ) +// |> Seq.toList + +// /// Perform n-step-ahead computation on the hypothesis and plant. +// let predictAhead (engine:EstimationEngine.EstimationEngine) system (plant:PlantIndividual) preTransform estimate = +// let g = +// match plant.Growth |> growthSeries with +// | Absolute g -> g +// | Cumulative g -> g +// | Relative g -> g +// let predictors = plant.Environment |> Map.add (ShortCode.create "x") g +// oneStepAhead engine system preTransform predictors estimate + + +// let oneStepPredictions = +// oldMleResults +// |> List.map(fun (s,hi,mle) -> + +// // 0. Convert x into biomass +// let preTransform (data:CodedMap>) = +// data +// |> Map.toList +// |> List.collect(fun (k,v) -> +// if k = code "x" +// then [ (k, v); (code "bs", v |> TimeSeries.map(fun (x,_) -> x |> ModelComponents.Proxies.toBiomassMM)) ] +// else [ (k, v)] ) +// |> Map.ofList + +// // 1. Arrange the subject and settings +// let shrub = s |> PlantIndividual.toCumulativeGrowth +// let common = shrub |> PlantIndividual.keepCommonYears +// let startDate = (common.Environment.[code "N"]).StartDate |> snd +// let startConditions = getStartValues startDate shrub +// let e = Options.engine |> Bristlecone.withConditioning (Custom startConditions) + +// let mlePool = +// hypotheses.[hi-1].Parameters +// |> Map.toList +// |> List.mapi(fun i (sc,p) -> +// let est = (mle |> snd).[i] +// sc, Parameter.setEstimate p est) +// |> ParameterPool + +// Cool.predictAhead e hypotheses.[hi-1] common preTransform mlePool +// |> List.map(fun r -> s.Identifier.Value, hi, r) +// ) + + +// type SOSRow = { +// Year: int +// Variable: string +// Shrub: string +// Hypothesis: int +// Observation: float +// OneStepPrediction: float +// } + +// let predictions = +// oneStepPredictions +// |> List.collect(fun m -> +// m |> Seq.collect(fun (s,h,m) -> +// m |> Seq.map(fun kv -> +// let y,a,b = kv.Value +// let x = +// { +// Year = y.Year +// Variable = kv.Key.Value +// Observation = a +// Shrub = s +// Hypothesis = h +// OneStepPrediction = b } +// x +// )) |> Seq.toList ) + +// predictions +// |> List.map(fun x -> sprintf "%s,%i,%i,%s,%f,%f" x.Shrub x.Hypothesis x.Year x.Variable x.Observation x.OneStepPrediction) +// |> List.append ["Shrub,Hypothesis,Year,Variable,Observed,OneStepPrediction"] +// |> String.concat "\n" +// |> fun x -> System.IO.File.WriteAllText("/Users/andrewmartin/Desktop/onestepahead.csv", x) + +// // Root mean squared error is the average squared differences, then square rooted. +// let rmse = +// predictions +// |> List.groupBy(fun x -> x.Shrub, x.Hypothesis, x.Variable) +// |> List.map(fun ((s,h,v),x) -> +// let sos = x |> Seq.averageBy(fun x -> (x.Observation - x.OneStepPrediction) ** 2.) +// s, h, v, sqrt sos) + +// rmse +// |> List.map(fun (s,h,v,sos) -> sprintf "%s,%i,%s,%f" s h v sos) +// |> List.append ["Shrub,Hypothesis,Variable,RMSE"] +// |> String.concat "\n" +// |> fun x -> System.IO.File.WriteAllText("/Users/andrewmartin/Desktop/onestepahead-2.csv", x) + diff --git a/docs/examples/tutorial.fsx b/docs/examples/tutorial.fsx new file mode 100644 index 0000000..d156232 --- /dev/null +++ b/docs/examples/tutorial.fsx @@ -0,0 +1,209 @@ +(** +--- +title: Quick Tour +category: Tutorials +categoryindex: 5 +index: 1 +--- +*) + +(*** condition: prepare ***) +#nowarn "211" +#r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" +(*** condition: fsx ***) +#if FSX +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // FSX +(*** condition: ipynb ***) +#if IPYNB +#r "nuget: Bristlecone,{{fsdocs-package-version}}" +#endif // IPYNB + +// (** +// Tutorial +// ======================== + +// * [Quick Start: Predator-Prey Dynamics](#quick-start-predator-prey-dynamics) +// * [Defining the model](#defining-the-model) +// * [The likelihood function](#The-likelihood-function) +// * [Putting it all together](#Putting-it-all-together) +// * [Does my model and method work?](#Does-my-model-and-method-work?) +// * [Fitting to observation data](#Fitting-to-observation-data) +// * [Time Modes (+ Integration)](#time-modes) +// * [Optimisation](#optimisation) +// * Working with Time Series +// * Setting a Likelihood Function +// * Measurement Variables +// * Environmental Forcing + +// ## Quick Start: Predator-Prey Dynamics + +// Here we use the classic example of snowshoe hare and lynx predator-prey dynamics, to demonstrate the basic functions of Bristlecone. The full example is in ``/samples/1. lynx-hare.fsx``. + +// In an F# script or interactive session, first load and open the Bristlecone library. + +// *) + +// #load "bristlecone.fsx" +// open Bristlecone +// open Bristlecone.ModelSystem + +// (**### Defining the model + +// A model system is defined as a series of differential equations, parameters, and a likelihood function. The model system is here defined as a pair of ordinary differential equations. + +// *) +// /// Number of snowshoe hares +// let dhdt' hare lynx alpha beta = +// alpha * hare - beta * hare * lynx + +// /// Number of lynx +// let dldt' lynx hare delta gamma = +// - gamma * lynx + delta * hare * lynx + + +// (**The parameters required for the model are defined as a ``CodedMap``. For the predator-prey model, they are defined as:*) + +// [ // Natural growth rate of hares in absence of predation +// ShortCode.create "alpha", Parameter.create Unconstrained 0.10 0.60 +// // Death rate per encounter of hares due to predation +// ShortCode.create "beta", Parameter.create Unconstrained 0.001 0.0135 +// // Efficiency of turning predated hares into lynx +// ShortCode.create "delta", Parameter.create Unconstrained 0.001 0.0135 +// // Natural death rate of lynx in the absence of food +// ShortCode.create "gamma", Parameter.create Unconstrained 0.10 0.60 +// ] |> Map.ofList + + +// (**### The likelihood function + +// For this example, we are simply using sum of squares as our measure of goodness of fit. Bristlecone includes sum of squares, and some neagtive log likelihood functions within the ``Bristlecone.Likelihood`` module. +// *) + +// ModelLibrary.Likelihood.sumOfSquares ["hare"; "lynx"] + +// (**### Putting it all together + +// The above models, parameters, and likelihood function must be tied together in a ``ModelSystem`` type. A complete model system for this problem is defined here: +// *) + +// let ``predator-prey`` = + +// let dhdt p _ x (e:Environment) = +// dhdt' x +// (e.[ShortCode.create "lynx"]) +// (p |> Pool.getEstimate "alpha") +// (p |> Pool.getEstimate "beta") + +// let dldt p _ y (e:Environment) = +// dldt' y +// (e.[ShortCode.create "hare"]) +// (p |> Pool.getEstimate "delta") +// (p |> Pool.getEstimate "gamma") + +// { Equations = [ code "hare", dhdt +// code "lynx", dldt ] |> Map.ofList +// Measures = [] |> Map.ofList +// Parameters = [ code "alpha", parameter PositiveOnly 0.10 0.60 +// code "beta", parameter PositiveOnly 0.001 0.0135 +// code "delta", parameter PositiveOnly 0.001 0.0135 +// code "gamma", parameter PositiveOnly 0.10 0.60 +// ] |> Map.ofList +// Likelihood = ModelLibrary.Likelihood.sumOfSquares ["hare"; "lynx"] } +// (** + +// The intermediate functions ``dhdt`` and ``dldt`` connect the raw ODEs to the functional signature required by Bristlecone. This feeds the current parameter estimates and variable values into the model. + +// The modelling protocol is defined by an ``EstimationEngine``, which can be reused for any number of analyses. +// *) + +// let engine = +// Bristlecone.mkContinuous +// |> Bristlecone.withConditioning RepeatFirstDataPoint + + +// (**### Does my model and method work? + +// It is recommended that any new model and method combination is tested, to verify that - given a known set of parameters - the same parameters are estimated. Bristlecone can draw many sets of random parameters and test that these can be correctly estimated: +// *) + +// let startValues = +// [ code "lynx", 30.09 +// code "hare", 19.58 ] |> Map.ofList + +// //``predator-prey`` |> Bristlecone.testModel engine Options.testSeriesLength startValues Options.iterations Options.burn + +// (** + +// ### Fitting to observation data + +// In the example script, we load data using FSharp.Data, from a CSV file. You can choose to load data using any method. The raw data is parsed into a Map of TimeSeries using the ``TimeSeries.createVarying`` function. +// *) + +// type PopulationData = FSharp.Data.CsvProvider<"../samples/data/lynx-hare.csv"> + +// let data = +// let csv = PopulationData.Load "../samples/data/lynx-hare.csv" +// [ code "hare", +// TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> r.Year, float r.Hare)) +// code "lynx", +// TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> r.Year, float r.Lynx)) +// ] |> Map.ofList + + +// (**To get model fitting results:*) + +// let result = +// ``predator-prey`` +// //|> Bristlecone.fit engine Options.iterations Options.burn data + + +// (**## Time Modes + +// Bristlecone can run models in either discrete time, or continuous time. When running models in continuous time, an integration function is required: +// *) + +// type TimeMode<'data, 'time> = +// | Discrete +// | Continuous of Integrate<'data, 'time> + +// and Integrate<'data,'time> = 'time -> 'time -> 'time -> CodedMap<'data> -> CodedMap -> CodedMap<'data[]> + +// (** +// Currently only fixed timesteps are supported, but variable timestep support (e.g. for sediment core data) is planned. + +// Two integration functions are included: + +// | Solver | Function | Description | +// | - | - | - | +// | Runge-Kutta 4 (MathNet Numerics) | ``Integration.MathNet.integrate`` | A fourth-order Runge Kutta method to provide approximate solutions to ODE systems. | +// | Runge-Kutta 547M (Open Solving Library for ODEs - Microsoft Research) | ``Integration.MsftOslo.integrate`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| + +// ## Optimisation + +// Bristlecone supports optimisation functions that have the following type signature: + +// ```fsharp +// type Optimise<'data> = int -> int -> Domain -> ('data[] -> 'data) -> ('data * 'data []) list +// ``` + +// There are two optimsation techniques currently built-in: + +// | Method | Function | Description | +// | - | - | - | +// | Amoeba (Nelder-Mead) | ``Optimisation.Amoeba.solve`` | A gradient-descent method. | +// | MCMC Random Walk | ``Optimisation.MonteCarlo.randomWalk`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| + + +// ## Estimation Engines + +// To use Bristlecone functions requires a configured ``EstimationEngine``. The easiest way is with the helper functions within the ``Bristlecone`` module: + +// | Function | Type | Description | +// | - | - | - | +// | ``mkContinuous`` | EstimationEngine | Default continuous engine | +// | ``mkDiscrete`` | EstimationEngine | Default discrete model engine | +// | ``withContinuousTime`` | t : Integrate<'a,'b> -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Transforms engine to continuous time mode, using the given integrator function. | +// | ``withConditioning`` | c: Conditioning -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Choose how the start point is chosen when solving the model system. | + +// **) \ No newline at end of file diff --git a/docs/index.fsx b/docs/index.fsx index fca179f..6166367 100644 --- a/docs/index.fsx +++ b/docs/index.fsx @@ -10,6 +10,7 @@ index: 1 (*** condition: prepare ***) #nowarn "211" #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" +#r "nuget: MathNet.Numerics.FSharp,5.0.0" (*** condition: fsx ***) #if FSX #r "nuget: Bristlecone,{{package-version}}" @@ -30,15 +31,16 @@ other fields that apply non-linear modelling techniques. ## Quick Start -Once you have installed [the .NET SDK](https://dot.net), the Bristlecone library -can be [installed from NuGet](https://nuget.org/packages/Bristlecone). +Bristlecone is an F# .NET library. You can easily get started by installing +[the latest .NET SDK](https://dot.net). You may then simply use the +Bristlecone package in a script, application, or library. +The nuget package [is available here](https://nuget.org/packages/Bristlecone). -### Use in an F# script +### To use in an F# script ![img/example.png](img/example.png) -Example -------- +## Example This example demonstrates the layout of a model when defined in Bristlecone. @@ -61,10 +63,13 @@ let hypothesis = let engine = Bristlecone.mkContinuous - |> Bristlecone.withTunedMCMC [ ]//Optimisation.MonteCarlo.TuneMethod.CovarianceWithScale 0.750, 200, Optimisation.EndConditions.afterIteration 25000 ] + |> Bristlecone.withCustomOptimisation (Optimisation.Amoeba.swarm 5 20 Optimisation.Amoeba.Solver.Default) let testSettings = Bristlecone.Test.TestSettings.Default -Bristlecone.testModel engine testSettings hypothesis +let testResult = Bristlecone.testModel engine testSettings hypothesis + +(*** include-fsi-output ***) + (** In the above snippet, a von Bertalanffy growth model is defined as a hypothesis to test. We then create an `EstimationEngine`, which defines the methodology for model-fitting. In Bristlecone, an `EstimationEngine` is created and customised using the F# forward pipe operator (for R users this may be familiar; this concept was adapted into the dplyr %>% operator). The call to `testModel` generates random test data, and assesses whether the model-fitting method can accurately estimate known parameters. diff --git a/docs/integration.fsx b/docs/integration.fsx index 55ec292..e88baf0 100644 --- a/docs/integration.fsx +++ b/docs/integration.fsx @@ -12,11 +12,11 @@ index: 3 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/language.fsx b/docs/language.fsx index 231e019..26c3b01 100644 --- a/docs/language.fsx +++ b/docs/language.fsx @@ -12,11 +12,11 @@ index: 2 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/logging.fsx b/docs/logging.fsx index 830782e..dac8b89 100644 --- a/docs/logging.fsx +++ b/docs/logging.fsx @@ -12,11 +12,11 @@ index: 3 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/model-fitting.fsx b/docs/model-fitting.fsx index 9d21357..6c8b08e 100644 --- a/docs/model-fitting.fsx +++ b/docs/model-fitting.fsx @@ -12,11 +12,11 @@ index: 2 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/model-selection.fsx b/docs/model-selection.fsx index c4b9158..381094f 100644 --- a/docs/model-selection.fsx +++ b/docs/model-selection.fsx @@ -12,11 +12,11 @@ index: 3 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** diff --git a/docs/model.fsx b/docs/model.fsx index 208157c..97f297f 100644 --- a/docs/model.fsx +++ b/docs/model.fsx @@ -12,11 +12,11 @@ index: 2 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -49,6 +49,11 @@ Bristlecone expressions.* let ``dN/dt`` = Parameter "r" * This * (Constant 1. - (This / Parameter "K")) +(** +From the above code, our model is compiled into the following internal representation: +*) +(*** include-value: ``dN/dt`` ***) + (** Our model of the change in *N* (population count) over time has two estimatable parameters defined using the `Parameter` term, which are *r* (the intrinsic growth rate of the population) diff --git a/docs/optimisation.fsx b/docs/optimisation.fsx index 0f74267..6711da2 100644 --- a/docs/optimisation.fsx +++ b/docs/optimisation.fsx @@ -12,11 +12,11 @@ index: 1 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -36,7 +36,8 @@ procedure can be used with Bristlecone by plugging in as follows: open Bristlecone -let myCustomOptimiser : EstimationEngine.Optimise = +let myCustomOptimiser : EstimationEngine.Optimiser = + EstimationEngine.InDetachedSpace <| fun writeOut n domain f -> invalidOp "Doesn't actually do anything!" let engine = @@ -69,11 +70,11 @@ uses a Cauchy distribution for jumps: let settings = MonteCarlo.SimulatedAnnealing.AnnealSettings.Default -let classicalSA : EstimationEngine.Optimise = - MonteCarlo.SimulatedAnnealing.classicalSimulatedAnnealing 0.01 false settings +// Classical Simulated Annealing: +MonteCarlo.SimulatedAnnealing.classicalSimulatedAnnealing 0.01 false settings -let fastSA : EstimationEngine.Optimise = - MonteCarlo.SimulatedAnnealing.fastSimulatedAnnealing 0.01 false settings +// Fast Simulated Annealing: +MonteCarlo.SimulatedAnnealing.fastSimulatedAnnealing 0.01 false settings (** The FSA approach enables greater exploration of more distant portions of @@ -100,6 +101,7 @@ It can be used as follows: *) let settingsNM = Amoeba.Solver.Default +(*** include-value: settingsNM ***) let single : EstimationEngine.Optimise = Amoeba.Solver.solve settingsNM @@ -117,8 +119,7 @@ can be used from: let levels = 5 let individuals = 20 -let swarmMode : EstimationEngine.Optimise = - Amoeba.swarm levels individuals settingsNM +Amoeba.swarm levels individuals settingsNM (** ### Monte Carlo methods @@ -140,15 +141,13 @@ which will be run before the final random walk. *) // Random walk with no tuning steps -let randomWalk : EstimationEngine.Optimise = - MonteCarlo.randomWalk [] +MonteCarlo.randomWalk [] // Random walk with 50,000 iterations of tuning, during // which the individual parameter jump sizes are scaled // every 500 iterations. -let randomWalkWithTuning : EstimationEngine.Optimise = - [ MonteCarlo.TuneMethod.Scale, 500, EndConditions.afterIteration 50000 ] - |> MonteCarlo.randomWalk +[ MonteCarlo.TuneMethod.Scale, 500, EndConditions.afterIteration 50000 ] +|> MonteCarlo.randomWalk (** @@ -162,8 +161,7 @@ parameter space. let weighting = 0.250 // Weight to give to recent history versus existing covariance structure let frequency = 200 // Tune the covariance structure every 200 iterations -let am : EstimationEngine.Optimise = - MonteCarlo.adaptiveMetropolis weighting frequency +MonteCarlo.adaptiveMetropolis weighting frequency (** @@ -186,8 +184,7 @@ let settingsFB = { TuneAfterChanges = 20 MinScaleChange = 0.01 BurnLength = EndConditions.afterIteration 100000 } -let optim : EstimationEngine.Optimise = - filzbach settingsFB +filzbach settingsFB (** #### 'Automatic' optimisation @@ -197,7 +194,7 @@ General-Purpose MCMC via New Adaptive Diagnostics" [Reference](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.70.7198&rep=rep1&type=pdf) *) -let auto = MonteCarlo.``Automatic (Adaptive Diagnostics)`` +MonteCarlo.``Automatic (Adaptive Diagnostics)`` (** #### Adaptive-Metropolis-within Gibbs @@ -208,7 +205,7 @@ Reference: Bai Y (2009). “An Adaptive Directional Metropolis-within-Gibbs Algo Technical Report in Department of Statistics at the University of Toronto. *) -let amwg = MonteCarlo.``Adaptive-Metropolis-within Gibbs`` +MonteCarlo.``Adaptive-Metropolis-within Gibbs`` (** #### Metropolis-within Gibbs @@ -217,4 +214,4 @@ A non-adaptive Metropolis-within-gibbs Sampler. Each parameter is updated individually, unlike the random walk algorithm. *) -let mwg = MonteCarlo.``Metropolis-within Gibbs`` +MonteCarlo.``Metropolis-within Gibbs`` diff --git a/docs/tutorial.fsx b/docs/tutorial.fsx deleted file mode 100644 index b788d05..0000000 --- a/docs/tutorial.fsx +++ /dev/null @@ -1,209 +0,0 @@ -(** ---- -title: Quick Tour -category: Tutorials -categoryindex: 5 -index: 1 ---- -*) - -(*** condition: prepare ***) -#nowarn "211" -#r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" -(*** condition: fsx ***) -#if FSX -#r "nuget: Bristlecone,{{package-version}}" -#endif // FSX -(*** condition: ipynb ***) -#if IPYNB -#r "nuget: Bristlecone,{{package-version}}" -#endif // IPYNB - -(** -Tutorial -======================== - -* [Quick Start: Predator-Prey Dynamics](#quick-start-predator-prey-dynamics) - * [Defining the model](#defining-the-model) - * [The likelihood function](#The-likelihood-function) - * [Putting it all together](#Putting-it-all-together) - * [Does my model and method work?](#Does-my-model-and-method-work?) - * [Fitting to observation data](#Fitting-to-observation-data) -* [Time Modes (+ Integration)](#time-modes) -* [Optimisation](#optimisation) -* Working with Time Series -* Setting a Likelihood Function -* Measurement Variables -* Environmental Forcing - -## Quick Start: Predator-Prey Dynamics - -Here we use the classic example of snowshoe hare and lynx predator-prey dynamics, to demonstrate the basic functions of Bristlecone. The full example is in ``/samples/1. lynx-hare.fsx``. - -In an F# script or interactive session, first load and open the Bristlecone library. - -*) - -#load "bristlecone.fsx" -open Bristlecone -open Bristlecone.ModelSystem - -(**### Defining the model - -A model system is defined as a series of differential equations, parameters, and a likelihood function. The model system is here defined as a pair of ordinary differential equations. - -*) -/// Number of snowshoe hares -let dhdt' hare lynx alpha beta = - alpha * hare - beta * hare * lynx - -/// Number of lynx -let dldt' lynx hare delta gamma = - - gamma * lynx + delta * hare * lynx - - -(**The parameters required for the model are defined as a ``CodedMap``. For the predator-prey model, they are defined as:*) - -[ // Natural growth rate of hares in absence of predation - ShortCode.create "alpha", Parameter.create Unconstrained 0.10 0.60 - // Death rate per encounter of hares due to predation - ShortCode.create "beta", Parameter.create Unconstrained 0.001 0.0135 - // Efficiency of turning predated hares into lynx - ShortCode.create "delta", Parameter.create Unconstrained 0.001 0.0135 - // Natural death rate of lynx in the absence of food - ShortCode.create "gamma", Parameter.create Unconstrained 0.10 0.60 - ] |> Map.ofList - - -(**### The likelihood function - -For this example, we are simply using sum of squares as our measure of goodness of fit. Bristlecone includes sum of squares, and some neagtive log likelihood functions within the ``Bristlecone.Likelihood`` module. -*) - -ModelLibrary.Likelihood.sumOfSquares ["hare"; "lynx"] - -(**### Putting it all together - -The above models, parameters, and likelihood function must be tied together in a ``ModelSystem`` type. A complete model system for this problem is defined here: -*) - -let ``predator-prey`` = - - let dhdt p _ x (e:Environment) = - dhdt' x - (e.[ShortCode.create "lynx"]) - (p |> Pool.getEstimate "alpha") - (p |> Pool.getEstimate "beta") - - let dldt p _ y (e:Environment) = - dldt' y - (e.[ShortCode.create "hare"]) - (p |> Pool.getEstimate "delta") - (p |> Pool.getEstimate "gamma") - - { Equations = [ code "hare", dhdt - code "lynx", dldt ] |> Map.ofList - Measures = [] |> Map.ofList - Parameters = [ code "alpha", parameter PositiveOnly 0.10 0.60 - code "beta", parameter PositiveOnly 0.001 0.0135 - code "delta", parameter PositiveOnly 0.001 0.0135 - code "gamma", parameter PositiveOnly 0.10 0.60 - ] |> Map.ofList - Likelihood = ModelLibrary.Likelihood.sumOfSquares ["hare"; "lynx"] } -(** - -The intermediate functions ``dhdt`` and ``dldt`` connect the raw ODEs to the functional signature required by Bristlecone. This feeds the current parameter estimates and variable values into the model. - -The modelling protocol is defined by an ``EstimationEngine``, which can be reused for any number of analyses. -*) - -let engine = - Bristlecone.mkContinuous - |> Bristlecone.withConditioning RepeatFirstDataPoint - - -(**### Does my model and method work? - -It is recommended that any new model and method combination is tested, to verify that - given a known set of parameters - the same parameters are estimated. Bristlecone can draw many sets of random parameters and test that these can be correctly estimated: -*) - -let startValues = - [ code "lynx", 30.09 - code "hare", 19.58 ] |> Map.ofList - -//``predator-prey`` |> Bristlecone.testModel engine Options.testSeriesLength startValues Options.iterations Options.burn - -(** - -### Fitting to observation data - -In the example script, we load data using FSharp.Data, from a CSV file. You can choose to load data using any method. The raw data is parsed into a Map of TimeSeries using the ``TimeSeries.createVarying`` function. -*) - -type PopulationData = FSharp.Data.CsvProvider<"../samples/data/lynx-hare.csv"> - -let data = - let csv = PopulationData.Load "../samples/data/lynx-hare.csv" - [ code "hare", - TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> r.Year, float r.Hare)) - code "lynx", - TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> r.Year, float r.Lynx)) - ] |> Map.ofList - - -(**To get model fitting results:*) - -let result = - ``predator-prey`` - //|> Bristlecone.fit engine Options.iterations Options.burn data - - -(**## Time Modes - -Bristlecone can run models in either discrete time, or continuous time. When running models in continuous time, an integration function is required: -*) - -type TimeMode<'data, 'time> = -| Discrete -| Continuous of Integrate<'data, 'time> - -and Integrate<'data,'time> = 'time -> 'time -> 'time -> CodedMap<'data> -> CodedMap -> CodedMap<'data[]> - -(** -Currently only fixed timesteps are supported, but variable timestep support (e.g. for sediment core data) is planned. - -Two integration functions are included: - -| Solver | Function | Description | -| - | - | - | -| Runge-Kutta 4 (MathNet Numerics) | ``Integration.MathNet.integrate`` | A fourth-order Runge Kutta method to provide approximate solutions to ODE systems. | -| Runge-Kutta 547M (Open Solving Library for ODEs - Microsoft Research) | ``Integration.MsftOslo.integrate`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| - -## Optimisation - -Bristlecone supports optimisation functions that have the following type signature: - -```fsharp -type Optimise<'data> = int -> int -> Domain -> ('data[] -> 'data) -> ('data * 'data []) list -``` - -There are two optimsation techniques currently built-in: - -| Method | Function | Description | -| - | - | - | -| Amoeba (Nelder-Mead) | ``Optimisation.Amoeba.solve`` | A gradient-descent method. | -| MCMC Random Walk | ``Optimisation.MonteCarlo.randomWalk`` | A method based on classic Runge-Kutta, but with automatic error and step size control. [See the documentation](https://www.microsoft.com/en-us/research/wp-content/uploads/2014/07/osloUserGuide.pdf).| - - -## Estimation Engines - -To use Bristlecone functions requires a configured ``EstimationEngine``. The easiest way is with the helper functions within the ``Bristlecone`` module: - -| Function | Type | Description | -| - | - | - | -| ``mkContinuous`` | EstimationEngine | Default continuous engine | -| ``mkDiscrete`` | EstimationEngine | Default discrete model engine | -| ``withContinuousTime`` | t : Integrate<'a,'b> -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Transforms engine to continuous time mode, using the given integrator function. | -| ``withConditioning`` | c: Conditioning -> engine: EstimationEngine<'a,'b> -> EstimationEngine<'a,'b> | Choose how the start point is chosen when solving the model system. | - -**) \ No newline at end of file diff --git a/docs/workflow.fsx b/docs/workflow.fsx index 991b3e4..54ec078 100644 --- a/docs/workflow.fsx +++ b/docs/workflow.fsx @@ -12,11 +12,11 @@ index: 4 #r "../src/Bristlecone/bin/Release/netstandard2.0/Bristlecone.dll" (*** condition: fsx ***) #if FSX -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // FSX (*** condition: ipynb ***) #if IPYNB -#r "nuget: Bristlecone,{{package-version}}" +#r "nuget: Bristlecone,{{fsdocs-package-version}}" #endif // IPYNB (** @@ -116,17 +116,17 @@ the orchestration agent: let orchestrator = Orchestration.OrchestrationAgent(logger, System.Environment.ProcessorCount, false) -fun datasets hypotheses engine -> +// fun datasets hypotheses engine -> - // Orchestrate the analyses - let work = workPackages datasets hypotheses engine - let run() = - work - |> Seq.iter ( - Orchestration.OrchestrationMessage.StartWorkPackage - >> orchestrator.Post) +// // Orchestrate the analyses +// let work = workPackages datasets hypotheses engine +// let run() = +// work +// |> Seq.iter ( +// Orchestration.OrchestrationMessage.StartWorkPackage +// >> orchestrator.Post) - run() +// run() (** If the above code is supplied with datasets, hypotheses, and an diff --git a/samples/1-population-resource.fsx b/samples/1-population-resource.fsx deleted file mode 100755 index e62418a..0000000 --- a/samples/1-population-resource.fsx +++ /dev/null @@ -1,82 +0,0 @@ -#load "bristlecone.fsx" -// #nuget "Bristlecone" - -//////////////////////////////////////////////////// -/// Snowshoe Hare and Lynx Predator-Prey Dynamics -//////////////////////////////////////////////////// - -(* Testing out Bristlecone, using the 90-year data set - of snowshoe hare and lynx pelts purchased by the - Hudson's Bay Company of Canada. Data is in 1000s. *) - -open Bristlecone // Opens Bristlecone core library and estimation engine -open Bristlecone.Language // Open the language for writing Bristlecone models -open Bristlecone.Time - -// 0. Configure Options -// ---------------------------- - -module Options = - let iterations = 100000 - let testSeriesLength = 50 - - -// 1. Model System -// ---------------------------- - -let ``predator-prey`` = - - let ``dh/dt`` = Parameter "α" * This - Parameter "β" * This * Environment "lynx" - let ``dl/dt`` = - Parameter "γ" * This + Parameter "Δ" * Environment "hare" * This - - Model.empty - |> Model.addEquation "hare" ``dh/dt`` - |> Model.addEquation "lynx" ``dl/dt`` - |> Model.estimateParameter "α" noConstraints 0.10 0.60 // Natural growth rate of hares in absence of predation - |> Model.estimateParameter "β" noConstraints 0.10 0.60 // Death rate per encounter of hares due to predation - |> Model.estimateParameter "Δ" noConstraints 0.10 0.60 // Efficiency of turning predated hares into lynx - |> Model.estimateParameter "γ" noConstraints 0.10 0.60 // Natural death rate of lynx in the absence of food - |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.sumOfSquares [ "hare"; "lynx" ]) - |> Model.compile - - -// 2. Setup Bristlecone Engine -// ---------------------------- -// A bristlecone engine provides a fixed setup for estimating parameters from data. -// Use the same engine for all model fits within a single study. -// This engine uses a gradident descent method (Nelder Mead simplex), and a basic -// Runge-Kutta 4 integration method provided by MathNet Numerics. - -let engine = - Bristlecone.mkContinuous - |> Bristlecone.withGradientDescent - |> Bristlecone.withContinuousTime Integration.MathNet.integrate - |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint - - -// 3. Test Engine and Model -// ---------------------------- -// Running a full test is strongly recommended. The test will demonstrate if the current -// configuration can find known parameters for a model. If this step fails, there is an -// issue with either your model, or the Bristlecone configuration. - -let startValues = [ ShortCode.create "lynx", 30.09; ShortCode.create "hare", 19.58 ] |> Map.ofList - -// TODO Test settings new format -//``predator-prey`` |> Bristlecone.testModel engine Options.testSeriesLength startValues Options.iterations [] - - -// 3. Load in Real Data -// ---------------------------- -// Here, we are using the FSharp.Data type provider to read in a CSV file. - -type PopulationData = FSharp.Data.CsvProvider<"../samples/data/lynx-hare.csv"> -let data = - let csv = PopulationData.Load "../samples/data/lynx-hare.csv" - [ ShortCode.create "hare", TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> float r.Hare, r.Year)) - ShortCode.create "lynx", TimeSeries.fromObservations (csv.Rows |> Seq.map(fun r -> float r.Lynx, r.Year)) ] |> Map.ofList - - -// 4. Fit Model to Real Data -// ----------------------------------- -let result = ``predator-prey`` |> Bristlecone.fit engine (Optimisation.EndConditions.afterIteration Options.iterations) data diff --git a/samples/2-external-environment.fsx b/samples/2-external-environment.fsx deleted file mode 100644 index ac1b4b3..0000000 --- a/samples/2-external-environment.fsx +++ /dev/null @@ -1,133 +0,0 @@ -#load "bristlecone.fsx" - -//////////////////////////////////////////////////// -/// Growth of tree rings in response to temperature -//////////////////////////////////////////////////// - -// This Bristlecone example demonstrates model-fitting -// using high-resolution monthly temperature data with -// annual ring width increments. - -open Bristlecone // Opens Bristlecone core library and estimation engine -open Bristlecone.Language // Open the language for writing Bristlecone models -open Bristlecone.Time - -// 0. Configure Options -// ---------------------------- - -module Settings = - - /// Bristlecone uses latitude and longitude to compute day length. - let latitude, longitude = 63.2, 15.2 - let endWhen = Optimisation.EndConditions.afterIteration 1 - - -// 1. Model System -// ---------------------------- - -let hypothesis = - - /// The universal gas constant in J mol−1 K−1 - let gasConstant = Constant 8.314 - - /// An Arrhenius function to represent temperature limitation on growth. - /// Form of equation: https://pubag.nal.usda.gov/download/13565/PDF - let temperatureLimitation = - Constant System.Math.E ** ( - (Constant 1000. * Parameter "Ea" * (Environment "temperature" - Constant 298.)) - / (Constant 298. * gasConstant * Environment "temperature")) - - /// Plant growth is a function of net photosynthesis minus environmental losses. - /// Photosynthesis is limited by light and temperature. - let ``db/dt`` = This * Parameter "r" * temperatureLimitation - Parameter "γB" * This - - Model.empty - |> Model.addEquation "stem radius" ``db/dt`` - |> Model.estimateParameter "Ea" noConstraints 40.0 50.0 - |> Model.estimateParameter "γB" noConstraints 0.01 0.40 - |> Model.estimateParameter "r" notNegative 0.01 1.00 - |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.sumOfSquares [ "stem radius" ]) - |> Model.compile - - -// 2. Setup Bristlecone Engine -// ---------------------------- -// A bristlecone engine provides a fixed setup for estimating parameters from data. -// Use the same engine for all model fits within a single study. - -let engine = - Bristlecone.mkContinuous - |> Bristlecone.withContinuousTime Integration.MathNet.integrate - |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint - |> Bristlecone.withTunedMCMC [ Optimisation.MonteCarlo.TuneMethod.CovarianceWithScale 0.200, 250, Optimisation.EndConditions.afterIteration 20000 ] - - -// 3. Test Engine and Model -// ---------------------------- -// Running a full test is strongly recommended. The test will demonstrate if the current -// configuration can find known parameters for a model. If this step fails, there is an -// issue with either your model, or the Bristlecone configuration. - -let startValues = [ ShortCode.create "lynx", 30.09; ShortCode.create "hare", 19.58 ] |> Map.ofList - -// TODO Test settings new format - -hypothesis |> Bristlecone.testModel engine Options.testSeriesLength startValues Options.iterations [] - - -// 4. Load Real Data -// ---------------------------- -// Here, we are using the FSharp.Data type provider to read in .csv datasets. -// We prepare the monthly temperature dataset for the analysis by: -// a) Loading the daily dataset using the FSharp.Data library; -// b) Replacing NaN values with the F# 'None' option type; -// c) Converting the dataset into a Bristlecone time series type; -// d) Interpolate the 'None' values by linear interpolation to fill in missing measurements; -// e) Generalise the time series from daily to monthly by applying an average function. - -open FSharp.Data - -// TODO Is there a way to streamline this? -[] -let DailyTemperatureUrl = __SOURCE_DIRECTORY__ + "/data/mean-temperature-daily.csv" - -let meanTemperatureMonthly = - let maxTemperatures = CsvProvider.Load DailyTemperatureUrl - maxTemperatures.Rows - |> Seq.map(fun r -> ( (if System.Double.IsNaN r.``T[avg]`` then None else Some (r.``T[avg]`` + 273.15)), r.Date)) - |> TimeSeries.fromObservations - |> TimeSeries.interpolate - |> TimeSeries.generalise (FixedTemporalResolution.Months (PositiveInt.create 1)) (fun x -> x |> Seq.averageBy fst) - - -module Test = - - open Bristlecone.Test - - let settings = TestSettings.Default - - let testSettings = { - Resolution = Years 1 - TimeSeriesLength = 30 - StartValues = [ code "b", 5. - code "t", 255. ] |> Map.ofList - EndCondition = Settings.endWhen - GenerationRules = [ "b" |> GenerationRules.alwaysLessThan 1000000. - "b" |> GenerationRules.alwaysMoreThan 0. - code "b", fun data -> (data |> Seq.max) - (data |> Seq.min) > 100. ] - NoiseGeneration = fun p data -> data - EnvironmentalData = [ code "t", TemperatureData.monthly ] |> Map.ofList - Random = MathNet.Numerics.Random.MersenneTwister() - StartDate = System.DateTime(1970,01,01) - Attempts = 50000 } - - let run () = - hypothesis - |> Bristlecone.testModel Settings.engine testSettings - - -let testResult = Test.run() - -// 4. Fit Model to Real Data -// ----------------------------------- -let result = hypothesis |> Bristlecone.fit engine (Optimisation.EndConditions.afterIteration Options.iterations) data diff --git a/samples/3-shrub-nitrogen.fsx b/samples/3-shrub-nitrogen.fsx deleted file mode 100644 index ac4e7c7..0000000 --- a/samples/3-shrub-nitrogen.fsx +++ /dev/null @@ -1,462 +0,0 @@ -#load "bristlecone.fsx" -#load "components/allometric.fsx" -// #nuget: Bristlecone - -//////////////////////////////////////////////////// -// Plant nitrogen limitation using wood rings -// and nitrogen isotopes -//////////////////////////////////////////////////// - -(* An example Bristlecone script for working with - wood ring datasets. *) - -open Bristlecone // Opens Bristlecone core library and estimation engine -open Bristlecone.Language // Open the language for writing Bristlecone models -open Bristlecone.Time - -// 1. Define basic model system -// ---------------------------- -// First, we define a 'base model' into which we can insert -// components that represent different hypotheses. - -let baseModel = - - /// Transform δ15N to N availability. - let ``δ15N -> N availability`` = - (Constant 100. * Environment "N" + Constant 309.) / Constant 359. - - /// Plant uptake of N from soil, which may be turned on or off - let uptake f geom = - (geom This) * This * (f ``δ15N -> N availability``) - - /// 1. Cumulative stem biomass - let ``db/dt`` geom nLimitation = - Parameter "r" * (uptake nLimitation geom) - Parameter "γ[b]" * This - - /// 2. Soil nitrogen availability - let ``dN/dt`` geom feedback limitationName nLimitation = - if limitationName = "None" - then Parameter "λ" - Parameter "γ[N]" * ``δ15N -> N availability`` + feedback This - else - Parameter "λ" - Parameter "γ[N]" * ``δ15N -> N availability`` - + feedback This - - (geom This) * This * (nLimitation ``δ15N -> N availability``) - - /// 3. Stem radius (a 'Measurement' variable) - let stemRadius lastRadius lastEnv env = - let oldCumulativeMass = lastEnv |> lookup "bs" - let newCumulativeMass = env |> lookup "bs" - if (newCumulativeMass - oldCumulativeMass) > 0. - then newCumulativeMass |> Allometric.Proxies.toRadiusMM - else lastRadius - - fun geom feedback (nLimitMode,nLimitation) -> - Model.empty - |> Model.addEquation "bs" (``db/dt`` geom nLimitation) - |> Model.addEquation "N" (``dN/dt`` geom feedback nLimitMode nLimitation) - |> Model.includeMeasure "x" stemRadius - |> Model.estimateParameter "λ" notNegative 0.001 0.500 - |> Model.estimateParameter "γ[N]" notNegative 0.001 0.200 - |> Model.estimateParameter "γ[b]" notNegative 0.001 0.200 - |> Model.useLikelihoodFunction (ModelLibrary.Likelihood.bivariateGaussian "x" "N") - |> Model.estimateParameter "ρ" noConstraints -0.500 0.500 - |> Model.estimateParameter "σ[x]" notNegative 0.001 0.100 - |> Model.estimateParameter "σ[y]" notNegative 0.001 0.100 - - -// 2. Define competing hypotheses -// ---------------------------- -/// Define 12 alternative hypotheses by defining three interchangeable components: -/// - Asymptotic plant size (2 types); -/// - Plant-soil feedback presence / absence (2 types); and -/// - Nitrogen limitation form (3 types). -/// The product of each of the three components with the base model forms 12 -/// alternative hypotheses, each represented as a `ModelSystem`. -let hypotheses = - - // 1. Setup two alternatives for geometric mode. - let chapmanRichards mass = Constant 1. - (mass / (Parameter "k" * Constant 1000.)) - let geometricModes = modelComponent "Geometric constraint" [ - subComponent "None" (Constant 1. |> (*)) - subComponent "Chapman-Richards" chapmanRichards - |> estimateParameter "k" notNegative 3.00 5.00 // Asymptotic biomass (in kilograms) - ] - - // 2. Setup two alternatives for plant-soil feedbacks. - let biomassLoss biomass = (Parameter "ɑ" / Constant 100.) * biomass * Parameter "γ[b]" - let feedbackModes = modelComponent "Plant-Soil Feedback" [ - subComponent "None" (Constant 1. |> (*)) - subComponent "Biomass Loss" biomassLoss - |> estimateParameter "ɑ" notNegative 0.01 1.00 // N-recycling efficiency - ] - - // 3. Setup three alternatives for the form of plant N limitation. - - let saturating minimumNutrient nutrient = - let hollingModel n = (Parameter "a" * n) / (Constant 1. + (Parameter "a" * Parameter "b" * Parameter "h" * n)) - Conditional(fun compute -> - if compute (hollingModel minimumNutrient) < 1e-12 - then Invalid - else hollingModel nutrient ) - - let linear min resource = - Conditional(fun compute -> - if compute (Parameter "a" * min) < 1e-12 then Invalid else Parameter "a" * resource) - - let limitationModes = modelComponent "N-limitation" [ - subComponent "Saturating" (saturating (Constant 5.)) - |> estimateParameter "a" notNegative 0.100 0.400 - |> estimateParameter "h" notNegative 0.100 0.400 - |> estimateParameter "r" notNegative 0.500 1.000 - subComponent "Linear" (linear (Constant 5.)) - |> estimateParameter "a" notNegative 0.100 0.400 - |> estimateParameter "r" notNegative 0.500 1.000 - subComponent "None" (Constant 1. |> (*)) - |> estimateParameter "r" notNegative 0.500 1.000 - ] - - baseModel - |> Hypotheses.createFromComponent geometricModes - |> Hypotheses.useAnother feedbackModes - |> Hypotheses.useAnotherWithName limitationModes - |> Hypotheses.compile - - -// 3. Setup Bristlecone Engine -// ---------------------------- -// A bristlecone engine provides a fixed setup for estimating parameters from data. -// Use the same engine for all model fits within a single study. - -let engine = - Bristlecone.mkContinuous - |> Bristlecone.withContinuousTime Integration.MathNet.integrate - |> Bristlecone.withConditioning Conditioning.RepeatFirstDataPoint - |> Bristlecone.withTunedMCMC [ Optimisation.MonteCarlo.TuneMethod.CovarianceWithScale 0.200, 250, Optimisation.EndConditions.afterIteration 20000 ] - - -// 4. Test Engine and Model -// ---------------------------- -// Running a full test is strongly recommended. The test will demonstrate if the current -// configuration can find known parameters for a model. If this step fails, there is an -// issue with either your model, or the Bristlecone configuration. - -let testSettings = - Test.create - |> Test.addNoise (Test.Noise.tryAddNormal "sigma[y]" "N") - |> Test.addNoise (Test.Noise.tryAddNormal "sigma[x]" "bs") - |> Test.addGenerationRules [ - Test.GenerationRules.alwaysMoreThan -3. "N" - Test.GenerationRules.alwaysLessThan 20. "N" - Test.GenerationRules.alwaysMoreThan 0. "bs" - Test.GenerationRules.monotonicallyIncreasing "x" ] // There must be at least 10mm of wood production - |> Test.addStartValues [ - "x", 5.0 - "bs", 5.0 |> Allometric.Proxies.toBiomassMM - "N", 3.64 ] - |> Test.withTimeSeriesLength 30 - |> Test.endWhen (Optimisation.EndConditions.afterIteration 1000) - -let testResult = - hypotheses - |> List.map(fst >> Bristlecone.testModel engine testSettings) - - -// 5. Load Real Data -// ---------------------------- -// Here, we are using the Bristlecone.Dendro package to -// read in dendroecological data. - -open Bristlecone.Dendro - -let shrubData = - let plants = Data.PlantIndividual.loadRingWidths (__SOURCE_DIRECTORY__ + "/../data/yamal-rw.csv") - let isotopeData = Data.PlantIndividual.loadLocalEnvironmentVariable (__SOURCE_DIRECTORY__ + "/../data/yuribei-d15N-imputed.csv") - plants |> PlantIndividual.zipEnvMany "N" isotopeData - - -// 6. Fit Models to Real Data -// ----------------------------------- - - - -// 7. Calculate model comparison statistics -// ----------------------------------- - - -// 3. Load Real Data and Estimate -// ---------------------------- - -// Define the start values for a model system, as follows: -// initial radius = -let startValues (startDate:System.DateTime) (plant:PlantIndividual.PlantIndividual) = - let initialRadius = - match plant.Growth with - | PlantIndividual.PlantGrowth.RingWidth s -> - match s with - | GrowthSeries.Absolute c -> c.Head |> fst - | _ -> invalidOp "Not applicable" - | _ -> invalidOp "Not applicable" - let initialMass = initialRadius |> ModelComponents.Proxies.toBiomassMM - let initialNitrogen = plant.Environment.[code "N"].Head |> fst - [ ("x", initialRadius) - ("N", initialNitrogen) - ("bs", initialMass) ] |> Map.ofList - -let workPackages shrubs hypotheses engine saveDirectory = - seq { - for s in shrubs do - - // 1. Arrange the subject and settings - let shrub = s |> PlantIndividual.toCumulativeGrowth - let common = shrub |> PlantIndividual.keepCommonYears - let startDate = (common.Environment.["N"]).StartDate |> snd - let startConditions = getStartValues startDate shrub - let e = engine |> Bristlecone.withConditioning (Custom startConditions) - - // 2. Setup batches of dependent analyses - for h in [ 1 .. hypotheses |> List.length ] do - for _ in [ 1 .. Options.chains ] do - if h = 1 || h = 2 || h = 7 || h = 8 || h = 9 || h = 10 then yield async { - // A. Compute result - let result = Bristlecone.PlantIndividual.fit e Options.endWhen hypotheses.[h-1] common |> fst - // B. Save to file - Bristlecone.Data.EstimationResult.saveAll saveDirectory s.Identifier.Value h 1 result - return result } - } - -// Orchestrate the analyses -let work = workPackages shrubs hypotheses Options.engine Options.resultsDirectory -let run() = work |> Seq.iter (OrchestrationMessage.StartWorkPackage >> Options.orchestrator.Post) - - -let saveDiagnostics () = - - // 1. Get all results sliced by plant and hypothesis - let results = - let get subject model modelId = Bristlecone.Data.EstimationResult.loadAll Options.resultsDirectory subject.Identifier.Value model modelId - Bristlecone.ModelSelection.ResultSet.arrangeResultSets shrubs hypotheses get - - // 2. Save convergence statistics to file - // NB include only results within 5 likelihood of the minimum (to remove outliers) - results - // |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) - // |> Seq.toList - |> Diagnostics.Convergence.gelmanRubinAll 10000 - |> Data.Convergence.save Options.resultsDirectory - - // 3. Save Akaike weights to file - results - |> ModelSelection.Select.weights - |> Seq.map(fun (x,a,b,c) -> x.Identifier.Value,a,b,c) - |> Data.ModelSelection.save Options.resultsDirectory - - // 4. Save out logged components - // results - // |> Seq.map(fun r -> calculateComponents fit Options.engine r) - - - - - -// What about one-step-ahead predictions? - -open Bristlecone.Diagnostics.ModelComponents -open FSharp.Data - -// 1. Load in all MLE results and pick the best for each shrub x hypothesis. -// - Must convert from old bristlecone format to new one. - -module LoadOldTrace = - - open Bristlecone.Data.Trace - open Bristlecone.Data.Config - - type OldTrace = CsvProvider<"/Users/andrewmartin/Desktop/Bristlecone Results/Paper1-Yuribei-Annual/dphil-shrub-output-YUSL03A-1-92aadb09-f035-4373-aacd-a16ed7dec822.csv"> - - let fileMatch directory subject modelId = - let path = System.IO.DirectoryInfo(directory) - if path.Exists then - let files = path.GetFiles(sprintf "dphil-shrub-output-%s-%i-*.csv" subject modelId) - let regex = sprintf "dphil-shrub-output-%s-%i-(%s).csv" subject modelId regexGuid - files |> Seq.choose(fun f -> - let m = System.Text.RegularExpressions.Regex.Match(f.Name, regex) - if m.Success - then - let guid = m.Groups.[1].Value |> System.Guid.Parse - (guid, f) |> Some - else None ) - else invalidArg "directory" "The specified directory does not exist" - - let toTrace (data:OldTrace) : (float * float []) list = - data.Rows - |> Seq.groupBy(fun r -> r.Iteration) - |> Seq.map(fun (i,r) -> - i, - (r |> Seq.head).NegativeLogLikelihood, - r |> Seq.map(fun r -> r.ParameterValue) |> Seq.toArray) - |> Seq.sortByDescending(fun (i,_,_) -> i) - |> Seq.map(fun (_,x,y) -> x,y) - |> Seq.toList - - let loadOldTrace directory subject modelId = - let traceFiles = fileMatch directory subject modelId - traceFiles - |> Seq.choose(fun (i,f) -> - printfn "File loading is %s" f.FullName - let data = OldTrace.Load f.FullName - match data.Rows |> Seq.length with - | 0 -> None - | _ -> data |> toTrace |> Some ) - -let oldShrubDir = "/Users/andrewmartin/Documents/DPhil Zoology/Bristlecone Results/YuribeiAnnual/" - -let oldMleResults = - seq { - for s in shrubs do - for hi in [ 1 .. 12 ] do - let oldResult = LoadOldTrace.loadOldTrace oldShrubDir s.Identifier.Value hi |> Seq.concat |> Seq.toList - let mle = oldResult |> Seq.minBy(fun (p,l) -> p) - yield s, hi, mle - } - |> Seq.toList - - -// 2. Setup each so that - -module Cool = - - open Bristlecone.Bristlecone - open Bristlecone.Optimisation - - /// "How good am I at predicting the next data point"? - /// - let oneStepAhead engine hypothesis (preTransform:CodedMap>->CodedMap>) (timeSeries) (estimatedTheta:ParameterPool) = - let mleToBounds mlePool = mlePool |> Map.map(fun k v -> Parameter.create (Parameter.detatchConstraint v |> snd) (v |> Parameter.getEstimate) (v |> Parameter.getEstimate)) - let hypothesisMle : ModelSystem = { hypothesis with Parameters = mleToBounds estimatedTheta } - let pairedDataFrames = - timeSeries - |> Map.map(fun _ fitSeries -> - fitSeries - |> TimeSeries.toObservations - |> Seq.pairwise - |> Seq.map (fun (t1,t2) -> TimeSeries.fromObservations [t1; t2] |> TimeSeries.map(fun (x,y) -> x ))) - printfn "Paired data frames = %A" pairedDataFrames - let timeParcelCount = (pairedDataFrames |> Seq.head).Value |> Seq.length - printfn "Time parcels: %i" timeParcelCount - let data = - seq { 1 .. timeParcelCount } - |> Seq.map(fun i -> pairedDataFrames |> Map.map(fun _ v -> v |> Seq.item (i-1)) |> preTransform) - printfn "Data: %A" data - - // TODO Remove this hack: - // It is predicting with a repeated first point... so... - // The next point estimate is at t1 - // The next point observation is at t2 - data - |> Seq.map (fun d -> - let est = Bristlecone.fit (engine |> withCustomOptimisation Optimisation.None.passThrough |> withConditioning RepeatFirstDataPoint) (EndConditions.afterIteration 0) d hypothesisMle - printfn "Estimated time series is %A" (est |> fst).Series - - let nextObservation = d |> Map.map(fun c ts -> ts |> TimeSeries.toObservations |> Seq.skip 1 |> Seq.head) - let paired = - nextObservation - |> Map.map(fun code obs -> - let nextEstimate = ((est |> fst).Series.[code].Values |> Seq.head).Fit - obs |> snd, obs |> fst, nextEstimate - ) - - paired - ) - |> Seq.toList - - /// Perform n-step-ahead computation on the hypothesis and plant. - let predictAhead (engine:EstimationEngine.EstimationEngine) system (plant:PlantIndividual) preTransform estimate = - let g = - match plant.Growth |> growthSeries with - | Absolute g -> g - | Cumulative g -> g - | Relative g -> g - let predictors = plant.Environment |> Map.add (ShortCode.create "x") g - oneStepAhead engine system preTransform predictors estimate - - -let oneStepPredictions = - oldMleResults - |> List.map(fun (s,hi,mle) -> - - // 0. Convert x into biomass - let preTransform (data:CodedMap>) = - data - |> Map.toList - |> List.collect(fun (k,v) -> - if k = code "x" - then [ (k, v); (code "bs", v |> TimeSeries.map(fun (x,_) -> x |> ModelComponents.Proxies.toBiomassMM)) ] - else [ (k, v)] ) - |> Map.ofList - - // 1. Arrange the subject and settings - let shrub = s |> PlantIndividual.toCumulativeGrowth - let common = shrub |> PlantIndividual.keepCommonYears - let startDate = (common.Environment.[code "N"]).StartDate |> snd - let startConditions = getStartValues startDate shrub - let e = Options.engine |> Bristlecone.withConditioning (Custom startConditions) - - let mlePool = - hypotheses.[hi-1].Parameters - |> Map.toList - |> List.mapi(fun i (sc,p) -> - let est = (mle |> snd).[i] - sc, Parameter.setEstimate p est) - |> ParameterPool - - Cool.predictAhead e hypotheses.[hi-1] common preTransform mlePool - |> List.map(fun r -> s.Identifier.Value, hi, r) - ) - - -type SOSRow = { - Year: int - Variable: string - Shrub: string - Hypothesis: int - Observation: float - OneStepPrediction: float -} - -let predictions = - oneStepPredictions - |> List.collect(fun m -> - m |> Seq.collect(fun (s,h,m) -> - m |> Seq.map(fun kv -> - let y,a,b = kv.Value - let x = - { - Year = y.Year - Variable = kv.Key.Value - Observation = a - Shrub = s - Hypothesis = h - OneStepPrediction = b } - x - )) |> Seq.toList ) - -predictions -|> List.map(fun x -> sprintf "%s,%i,%i,%s,%f,%f" x.Shrub x.Hypothesis x.Year x.Variable x.Observation x.OneStepPrediction) -|> List.append ["Shrub,Hypothesis,Year,Variable,Observed,OneStepPrediction"] -|> String.concat "\n" -|> fun x -> System.IO.File.WriteAllText("/Users/andrewmartin/Desktop/onestepahead.csv", x) - -// Root mean squared error is the average squared differences, then square rooted. -let rmse = - predictions - |> List.groupBy(fun x -> x.Shrub, x.Hypothesis, x.Variable) - |> List.map(fun ((s,h,v),x) -> - let sos = x |> Seq.averageBy(fun x -> (x.Observation - x.OneStepPrediction) ** 2.) - s, h, v, sqrt sos) - -rmse -|> List.map(fun (s,h,v,sos) -> sprintf "%s,%i,%s,%f" s h v sos) -|> List.append ["Shrub,Hypothesis,Variable,RMSE"] -|> String.concat "\n" -|> fun x -> System.IO.File.WriteAllText("/Users/andrewmartin/Desktop/onestepahead-2.csv", x) -