From 7b2ffedcee561a084cf5e19a8067c9e67072fc37 Mon Sep 17 00:00:00 2001 From: Olivier Labayle Date: Thu, 25 Jul 2024 10:08:26 +0100 Subject: [PATCH] up TargetedEstimation dep --- Manifest.toml | 4 ++-- src/cli.jl | 6 +++--- src/inputs_from_config.jl | 10 +++++++--- src/plots.jl | 21 ++++----------------- test/inputs_from_gwas_config.jl | 1 + test/runtests.jl | 17 +++++++++-------- test/testutils.jl | 28 ++++------------------------ 7 files changed, 30 insertions(+), 57 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index f689f0f..7ea5b8b 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -2379,8 +2379,8 @@ version = "1.10.0" [[deps.TargetedEstimation]] deps = ["ArgParse", "Arrow", "CSV", "CategoricalArrays", "Combinatorics", "Configurations", "DataFrames", "EvoTrees", "GLMNet", "HDF5", "JLD2", "JSON", "MKL", "MLJ", "MLJBase", "MLJLinearModels", "MLJModelInterface", "MLJModels", "MLJXGBoostInterface", "Mmap", "MultipleTesting", "Optim", "PackageCompiler", "Random", "Serialization", "TMLE", "Tables", "YAML"] -git-tree-sha1 = "98ae29ef85f5bbeb088b1003c58ced0e0e6104d3" -repo-rev = "acc1de14e05517b6b5e1fe441526f097edb8eb19" +git-tree-sha1 = "9cf3987b177a22ca374b9b11ab7a89de5bdd56b3" +repo-rev = "f4620ef1565f71019fa539e746ff109727e6671c" repo-url = "https://github.com/TARGENE/TargetedEstimation.jl" uuid = "2573d147-4098-46ba-9db2-8608d210ccac" version = "0.9.0" diff --git a/src/cli.jl b/src/cli.jl index 46d391d..431bc02 100644 --- a/src/cli.jl +++ b/src/cli.jl @@ -109,8 +109,8 @@ function cli_settings() end @add_arg_table s["summary-plots"] begin - "results-prefix" - help = "Prefix to result files." + "results-file" + help = "Path to the results file." required = true "--outprefix" @@ -201,7 +201,7 @@ function julia_main()::Cint ) elseif cmd == "summary-plots" summary_plots( - cmd_settings["results-prefix"], + cmd_settings["results-file"], outprefix=cmd_settings["outprefix"], verbosity=cmd_settings["verbosity"], ) diff --git a/src/inputs_from_config.jl b/src/inputs_from_config.jl index abd7cca..7d84454 100644 --- a/src/inputs_from_config.jl +++ b/src/inputs_from_config.jl @@ -121,6 +121,11 @@ function estimands_from_flat_list(estimands_configs, dataset, variants, outcomes return estimands end +function treatments_from_variant(variant::Symbol, dataset) + variant_levels = sort(levels(dataset[!, variant], skipmissing=true)) + return NamedTuple{(variant,)}([variant_levels]) +end + function gwas_estimands(dataset, variants, outcomes, confounders; outcome_extra_covariates=[], positivity_constraint=0., @@ -128,9 +133,8 @@ function gwas_estimands(dataset, variants, outcomes, confounders; ) estimands = [] verbosity > 0 && @info(string("Generating GWAS estimands.")) - for v in variants - variant_levels = sort(levels(dataset[!, v], skipmissing=true)) - treatments = NamedTuple{(Symbol(v),)}([variant_levels]) + for variant in Symbol.(variants) + treatments = treatments_from_variant(variant, dataset) try_append_new_estimands!( estimands, dataset, diff --git a/src/plots.jl b/src/plots.jl index 9a54094..d6c37ef 100644 --- a/src/plots.jl +++ b/src/plots.jl @@ -5,22 +5,9 @@ log10_uniform_quantiles(n) = -log10.(collect(LinRange(0., 1., n + 1))[2:end]) log10_beta_quantiles(n, alpha) = -log10.([quantile(Beta(k, n + 1 − k), alpha) for k in 1:n]) -function read_results_file(file) - jldopen(file) do io - return reduce(vcat, (io[key] for key in keys(io))) - end -end - -function read_results_files(prefix) - directory_, prefix_ = splitdir(prefix) - directory = directory_ == "" ? "." : directory_ - matching_files = [joinpath(directory_, file) for file in readdir(directory) if startswith(file, prefix_)] - reduce(vcat, (read_results_file(file) for file in matching_files)) -end - -function load_results(file_or_prefix; verbosity=0) +function load_results(resultsfile; verbosity=0) verbosity > 0 && @info "Loading results." - results = isfile(file_or_prefix) ? read_results_file(file_or_prefix) : read_results_files(file_or_prefix) + results = jldopen(io -> io["results"], resultsfile) estimators = collect(key for key ∈ keys(first(results)) if key !== :SAMPLE_IDS) results_df = DataFrame([[r[id] for r in results] for id in 1:length(estimators)], estimators) for estimator in estimators @@ -57,10 +44,10 @@ function qqplot(results, outprefix) return fig end -function summary_plots(results_prefix; +function summary_plots(results_file; outprefix="final", verbosity=0 ) - results = load_results(results_prefix; verbosity=verbosity) + results = load_results(results_file; verbosity=verbosity) qqplot(results, outprefix) end \ No newline at end of file diff --git a/test/inputs_from_gwas_config.jl b/test/inputs_from_gwas_config.jl index e3a7880..f7a9412 100644 --- a/test/inputs_from_gwas_config.jl +++ b/test/inputs_from_gwas_config.jl @@ -104,4 +104,5 @@ end end end + true \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 0984f86..a841969 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,11 +2,12 @@ using TargeneCore using Test TESTDIR = joinpath(pkgdir(TargeneCore), "test") - -@test include(joinpath(TESTDIR, "utils.jl")) -@test include(joinpath(TESTDIR, "dataset.jl")) -@test include(joinpath(TESTDIR, "plots.jl")) -@test include(joinpath(TESTDIR, "confounders.jl")) -@test include(joinpath(TESTDIR, "inputs_from_estimands.jl")) -@test include(joinpath(TESTDIR, "inputs_from_config.jl")) -@test include(joinpath(TESTDIR, "inputs_from_gwas_config.jl")) \ No newline at end of file +@testset "Test TargeneCore" begin + @test include(joinpath(TESTDIR, "utils.jl")) + @test include(joinpath(TESTDIR, "dataset.jl")) + @test include(joinpath(TESTDIR, "plots.jl")) + @test include(joinpath(TESTDIR, "confounders.jl")) + @test include(joinpath(TESTDIR, "inputs_from_estimands.jl")) + @test include(joinpath(TESTDIR, "inputs_from_config.jl")) + @test include(joinpath(TESTDIR, "inputs_from_gwas_config.jl")) +end diff --git a/test/testutils.jl b/test/testutils.jl index 7656260..a1c3c6d 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -109,36 +109,16 @@ end function save(estimates; prefix="tmle_output") outputs = TargetedEstimation.Outputs( - json=TargetedEstimation.JSONOutput(filename=prefix*".json"), - jls=TargetedEstimation.JLSOutput(filename=prefix*".jls"), - hdf5=TargetedEstimation.HDF5Output(filename=prefix*".hdf5") + json=prefix*".json", + jls=prefix*".jls", + hdf5=prefix*".hdf5" ) - TargetedEstimation.initialize(outputs) - batches = collect(Iterators.partition(estimates, 2)) - nbatches = length(batches) - for (batchid, batch) in enumerate(batches) - # Append JSON Output - TargetedEstimation.update_file(outputs.json, batch; finalize=nbatches==batchid) - # Append JLS Output - TargetedEstimation.update_file(outputs.jls, batch) - # Append HDF5 Output - TargetedEstimation.update_file(outputs.hdf5, batch) - end + TargetedEstimation.write(outputs, estimates) end make_fake_outputs(estimates_generator=make_estimates; prefix="tmle_output") = save(estimates_generator(); prefix=prefix) -function clean(;prefix="tmle_output") - dir_, prefix_ = splitdir(prefix) - dir = dir_ == "" ? "." : dir_ - for filename in readdir(dir) - if startswith(filename, prefix_) - rm(joinpath(dir_, filename)) - end - end -end - ### Fixtures for inputs_from_estimands function make_estimands_configuration()