Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #11

Merged
merged 3 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ serde_yaml = "0.9.32"
clap = { version = "4.5.1", features = ["derive"] }
itertools = "0.12.1"
assert_fs = "1.1.1"
rand_distr = "0.5.0-alpha.0"
rand_distr = "0.5.0-alpha.0"
45 changes: 14 additions & 31 deletions config/neat_test.yml
Original file line number Diff line number Diff line change
@@ -1,34 +1,17 @@
reference: "data/ecoli.fa"
read_len: .
coverage: .
ploidy: .
paired_ended: .
fragment_mean: .
fragment_st_dev: .
read_len: 10
coverage: 3
ploidy: 9
paired_ended: true
fragment_mean: 10.0
fragment_st_dev: 11.0
mutation_rate: 0.111

produce_bam: .
produce_vcf: .
produce_fasta: .
produce_fastq: .
produce_bam: true
produce_vcf: true
produce_fasta: true
produce_fastq: false

error_model: .
mutation_model: .
fragment_model: .
gc_model: .

partition_mode: .
threads: .
avg_seq_error: .
rescale_qualities: .
quality_offset: .
include_vcf: .
target_bed: .
off_target_scalar: .
discard_bed: .
mutation_rate: .
mutation_bed: .
no_coverage_bias: .
rng_seed: .
min_mutations: .
fasta_per_ploid: .
overwrite_output: .
overwrite_output: true
output_dir: .
output_prefix: testing
2 changes: 1 addition & 1 deletion config/simple_template.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
reference: REQUIRED
reference: .
read_len: .
coverage: 20
mutation_rate: .
Expand Down
85 changes: 7 additions & 78 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,19 @@ extern crate itertools;

mod utils;

use std::collections::{HashMap, HashSet};
use std::collections::HashMap;
use std::fs::File;
use clap::{Parser};
use log::*;
use simplelog::*;
use rand::thread_rng;
use rand::prelude::*;

use utils::cli;
use utils::fasta_tools::{read_fasta, write_fasta};
use utils::config::{read_config_yaml, build_config_from_args};
use utils::mutate::mutate_fasta;
use utils::make_reads::generate_reads;
use utils::fastq_tools::write_fastq;
use utils::file_tools::check_parent_and_create;
use utils::vcf_tools::write_vcf;
use utils::file_tools::check_parent;
use utils::runner::run_neat;

fn main() {
fn main() -> Result<(), std::fmt::Error> {

info!("Begin processing");
// parse the arguments from the command line
Expand All @@ -45,7 +40,7 @@ fn main() {
};

// Check that the parent dir exists
let log_destination = check_parent_and_create(&args.log_dest).unwrap();
let log_destination = check_parent(&args.log_dest).unwrap();

CombinedLogger::init(vec![
#[cfg(feature = "termcolor")]
Expand Down Expand Up @@ -77,73 +72,7 @@ fn main() {
Ok(build_config_from_args(args).expect("Problem reading configuration yaml file"))
}.unwrap();

// Create the prefix of the files to write
let output_file = format!("{}/{}", config.output_dir, config.output_prefix);

// Reading the reference file into memory
info!("Mapping reference fasta file: {}", &config.reference);
let (fasta_map, fasta_order) = read_fasta(&config.reference);

// Mutating the reference and recording the variant locations.
info!("Mutating reference.");
let (mutated_map, variant_locations) = mutate_fasta(
&fasta_map,
&mut rng
);

if config.produce_fasta {
info!("Outputting fasta file");
write_fasta(
&mutated_map,
&fasta_order,
config.overwrite_output,
&output_file,
).expect("Problem writing fasta file");
}

if config.produce_vcf {
info!("Writing vcf file");
write_vcf(
&variant_locations,
&fasta_order,
config.ploidy,
&config.reference,
config.overwrite_output,
&output_file,
&mut rng).expect("Error writing vcf file")
}

let mut read_sets: HashSet<Vec<u8>> = HashSet::new();
for (_name, sequence) in mutated_map.iter() {
// defined as a set of read sequences that should cover
// the mutated sequence `coverage` number of times
let data_set = generate_reads(
sequence,
&config.read_len,
&config.coverage,
config.paired_ended,
config.fragment_mean,
config.fragment_st_dev,
&mut rng
);

read_sets.extend(*data_set);
}

if config.produce_fastq {
info!("Shuffling output fastq data");
let mut outsets: Box<Vec<&Vec<u8>>> = Box::new(read_sets.iter().collect());
outsets.shuffle(&mut rng);

info!("Writing fastq");
write_fastq(
&output_file,
config.overwrite_output,
config.paired_ended,
*outsets,
).expect("Problem writing fastq file");
info!("Processing complete")
}

run_neat(config, &mut rng).unwrap();
Ok(())
}

5 changes: 4 additions & 1 deletion src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,7 @@ pub mod cli;
pub mod make_reads;
pub mod mutate;
pub mod fastq_tools;
pub mod vcf_tools;
pub mod vcf_tools;
pub mod nucleotides;

pub mod runner;
Loading