-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
66af05f
commit e7c25bb
Showing
7 changed files
with
191 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
mutable struct BatchManager | ||
outprefix::String | ||
max_batch_size::Union{Int, Nothing} | ||
current_batch_id::Int | ||
current_estimands::Vector | ||
current_batch_size::Int | ||
batch_id::Int | ||
BatchManager(outprefix, max_batch_size) = new(outprefix, max_batch_size, 1, [], 0, 1) | ||
end | ||
|
||
function save_batch!(batch_saver::BatchManager, groupname) | ||
batchfilename = batch_name(string(batch_saver.outprefix, ".", groupname), batch_saver.batch_id) | ||
serialize(batchfilename, Configuration(estimands=batch_saver.current_estimands)) | ||
batch_saver.batch_id += 1 | ||
batch_saver.current_estimands = [] | ||
batch_saver.current_batch_size = 0 | ||
end | ||
|
||
""" | ||
Here we aim to accomplish a few things to perform variant effect size estimates across the genome | ||
1. Loading the merged {.bed, .bim, .fam} files using SnpArrays this can be grabbed after RunPCALoco process | ||
2. Loading the relative PC files also from the RunPCALoco process | ||
3. Iterating through the merged genotype files and computing the factorialATE() for each variant on a specified target | ||
""" | ||
function loco_gwas(parsed_args) | ||
outprefix = parsed_args["out-prefix"] | ||
batch_saver = BatchManager(outprefix, parsed_args["batch-size"]) | ||
call_threshold = parsed_args["call-threshold"] | ||
bgen_prefix = parsed_args["bgen-prefix"] | ||
bim_prefix = parsed_args["bim-prefix"] | ||
positivity_constraint = parsed_args["positivity-constraint"] | ||
traits = read_data(parsed_args["traits"]) | ||
# work out logic for PCs | ||
pcs = read_data(parsed_args["pcs"]) | ||
# logic for config needs to be updated | ||
config = YAML.load_file(parsed_args["loco-gwas"]["config"]) | ||
|
||
# Variables | ||
# variants_config = config["variants"] | ||
extra_treatments = haskey(config, "extra_treatments") ? Symbol.(config["extra_treatments"]) : [] | ||
outcome_extra_covariates = haskey(config, "outcome_extra_covariates") ? Symbol.(config["outcome_extra_covariates"]) : [] | ||
extra_confounders = haskey(config, "extra_confounders") ? Symbol.(config["extra_confounders"]) : [] | ||
confounders = all_confounders(pcs, extra_confounders) | ||
nonoutcomes = Set(vcat(:SAMPLE_ID, extra_confounders, outcome_extra_covariates, extra_treatments)) | ||
outcomes = filter(x -> x ∉ nonoutcomes, Symbol.(names(traits))) | ||
|
||
# Genotypes and final dataset | ||
variants_set = read_bim(bim_prefix) | ||
# genotypes = call_genotypes(bgen_prefix, variants_set, call_threshold) | ||
|
||
dataset = DataFrame(Arrow.Table()) | ||
for col in snparray | ||
x = convert(Vector{Float64}, [col]) | ||
dataset[!, :x] = x | ||
end | ||
|
||
# dataset = merge(traits, pcs, genotypes) | ||
# println(dataset) | ||
# Arrow.write(string(outprefix, ".data.arrow"), dataset) | ||
|
||
# # Estimands | ||
# for estimand_type in config["estimands"] | ||
# if estimand_type == "IATE" | ||
# orders = config["orders"] | ||
# generate_iates!(batch_saver, dataset, variants_config, outcomes, confounders; | ||
# extra_treatments=extra_treatments, | ||
# outcome_extra_covariates=outcome_extra_covariates, | ||
# positivity_constraint=positivity_constraint, | ||
# orders=orders | ||
# ) | ||
# else | ||
# throw(ArgumentError(string("Unknown estimand type: ", estimand_type))) | ||
# end | ||
# end | ||
|
||
return 0 | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
orders: [2, 3] | ||
estimands: | ||
- IATE | ||
variants: | ||
TF1: | ||
bQTLs: | ||
- RSID_17 | ||
- RSID_99 | ||
eQTLs: | ||
- RSID_102 | ||
TF2: | ||
bQTLs: | ||
- RSID_17 | ||
- RSID_198 | ||
eQTLs: | ||
- RSID_2 | ||
extra_treatments: | ||
- TREAT_1 | ||
outcome_extra_covariates: | ||
- COV_1 | ||
extra_confounders: | ||
- 21003 | ||
- 22001 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
module TestLOCOGWAS | ||
|
||
using Test | ||
using TargeneCore | ||
using Arrow | ||
using DataFrames | ||
using Serialization | ||
|
||
TESTDIR = joinpath(pkgdir(TargeneCore), "test") | ||
|
||
include(joinpath(TESTDIR, "tmle_inputs", "test_utils.jl")) | ||
|
||
@testset "Test LOCO-GWAS: with positivity constraint" begin | ||
tmpdir = mktempdir() | ||
parsed_args = Dict( | ||
"loco-gwas" => Dict{String, Any}( | ||
"config" => joinpath(TESTDIR, "data", "gwas_config.yaml"), | ||
), | ||
"traits" => joinpath(TESTDIR, "data", "traits_1.csv"), #investigate | ||
"pcs" => joinpath(TESTDIR, "data", "pcs.csv"), #investigate | ||
"%COMMAND%" => "loco-gwas", | ||
"out-prefix" => joinpath(tmpdir, "final"), | ||
"bgen-prefix" => joinpath(TESTDIR, "data", "ukbb", "imputed", "ukb_53116"), | ||
"bim-prefix" => joinpath(TESTDIR, "data", "ukbb", "genotypes", "ukb_53116"), | ||
"call-threshold" => 0.8, | ||
"batch-size" => nothing, | ||
"positivity-constraint" => 0.01, | ||
) | ||
tmle_inputs(parsed_args) | ||
end | ||
|
||
end |