From 59063f593fef054d178c211d5f832286f8f6f6a0 Mon Sep 17 00:00:00 2001
From: Joshua Allen <jallen17@illinois.edu>
Date: Tue, 19 Mar 2024 01:42:48 -0500
Subject: [PATCH 1/2] added tests. Overal coverage is at 64%, but making
 progress in utils at 74%.

---
 config/neat_test.yml       |  45 ++++--------
 config/simple_template.yml |   2 +-
 src/main.rs                |   4 +-
 src/utils/config.rs        | 146 +++++++++++++++++++++++++++++++------
 src/utils/fasta_tools.rs   |  81 +++++++++++---------
 src/utils/fastq_tools.rs   |  72 ++++++++++++++++++
 src/utils/file_tools.rs    |  28 +++++--
 src/utils/make_reads.rs    |  35 ++++++++-
 8 files changed, 312 insertions(+), 101 deletions(-)
diff --git a/config/neat_test.yml b/config/neat_test.yml
index b230922..482b93e 100644
--- a/config/neat_test.yml
+++ b/config/neat_test.yml
@@ -1,34 +1,17 @@
 reference: "data/ecoli.fa"
-read_len: .
-coverage: .
-ploidy: .
-paired_ended: .
-fragment_mean: .
-fragment_st_dev: .
+read_len: 10
+coverage: 3
+ploidy: 9
+paired_ended: true
+fragment_mean: 10.0
+fragment_st_dev: 11.0
+mutation_rate: 0.111
 
-produce_bam: .
-produce_vcf: .
-produce_fasta: .
-produce_fastq: .
+produce_bam: true
+produce_vcf: true
+produce_fasta: true
+produce_fastq: false
 
-error_model: .
-mutation_model: .
-fragment_model: .
-gc_model: .
-
-partition_mode: .
-threads: .
-avg_seq_error: .
-rescale_qualities: .
-quality_offset: .
-include_vcf: .
-target_bed: .
-off_target_scalar: .
-discard_bed: .
-mutation_rate: .
-mutation_bed: .
-no_coverage_bias: .
-rng_seed: .
-min_mutations: .
-fasta_per_ploid: .
-overwrite_output: .
+overwrite_output: true
+output_dir: /home/joshfactorial/code/neat_output
+output_prefix: testing
diff --git a/config/simple_template.yml b/config/simple_template.yml
index 3af3026..c8ed324 100644
--- a/config/simple_template.yml
+++ b/config/simple_template.yml
@@ -1,4 +1,4 @@
-reference: REQUIRED
+reference: .
 read_len: .
 coverage: 20
 mutation_rate: .
diff --git a/src/main.rs b/src/main.rs
index 0785e57..f475e4f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -22,7 +22,7 @@ use utils::config::{read_config_yaml, build_config_from_args};
 use utils::mutate::mutate_fasta;
 use utils::make_reads::generate_reads;
 use utils::fastq_tools::write_fastq;
-use utils::file_tools::check_parent_and_create;
+use utils::file_tools::check_parent;
 use utils::vcf_tools::write_vcf;
 
 fn main() {
@@ -45,7 +45,7 @@ fn main() {
     };
 
     // Check that the parent dir exists
-    let log_destination = check_parent_and_create(&args.log_dest).unwrap();
+    let log_destination = check_parent(&args.log_dest).unwrap();
 
     CombinedLogger::init(vec![
         #[cfg(feature = "termcolor")]
diff --git a/src/utils/config.rs b/src/utils/config.rs
index 495202a..5a2d1ff 100644
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@@ -38,8 +38,8 @@ pub struct RunConfiguration {
     pub mutation_rate: f64,
     pub ploidy: usize,
     pub paired_ended: bool,
-    pub fragment_mean: f64,
-    pub fragment_st_dev: f64,
+    pub fragment_mean: Option<f64>,
+    pub fragment_st_dev: Option<f64>,
     pub produce_fastq: bool,
     pub produce_fasta: bool,
     pub produce_vcf:  bool,
@@ -58,7 +58,6 @@ impl RunConfiguration {
 
 // The config builder allows us to construct a config in multiple different ways, depending
 // on the input.
-#[derive(Default)]
 pub struct ConfigBuilder {
     reference: String,
     read_len: usize,
@@ -66,8 +65,8 @@ pub struct ConfigBuilder {
     mutation_rate: f64,
     ploidy: usize,
     paired_ended: bool,
-    fragment_mean: f64,
-    fragment_st_dev: f64,
+    fragment_mean: Option<f64>,
+    fragment_st_dev: Option<f64>,
     produce_fastq: bool,
     produce_fasta: bool,
     produce_vcf:  bool,
@@ -93,8 +92,8 @@ impl ConfigBuilder {
             mutation_rate: 0.001,
             ploidy: 2,
             paired_ended: false,
-            fragment_mean: 300.0,
-            fragment_st_dev: 30.0,
+            fragment_mean: None,
+            fragment_st_dev: None,
             produce_fastq: true,
             produce_fasta: false,
             produce_vcf: false,
@@ -137,12 +136,12 @@ impl ConfigBuilder {
     }
 
     pub fn set_fragment_mean(mut self, fragment_mean: f64) -> ConfigBuilder {
-        self.fragment_mean = fragment_mean;
+        self.fragment_mean = Option::from(fragment_mean);
         self
     }
 
     pub fn set_fragment_st_dev(mut self, fragment_st_dev: f64) -> ConfigBuilder {
-        self.fragment_st_dev = fragment_st_dev;
+        self.fragment_st_dev = Option::from(fragment_st_dev);
         self
     }
 
@@ -197,7 +196,7 @@ impl ConfigBuilder {
             return Err(ConfigError::ConfigurationError);
         }
         if self.paired_ended {
-            if self.fragment_mean.is_nan() | self.fragment_st_dev.is_nan() {
+            if self.fragment_mean.is_none() | self.fragment_st_dev.is_none() {
                 error!(
                     "Paired ended is set to true, but fragment mean \
                     and standard deviation were not set."
@@ -205,8 +204,8 @@ impl ConfigBuilder {
                 return Err(ConfigError::ConfigurationError);
             }
             if self.produce_fastq {
-                debug!("\t> fragment mean: {}", self.fragment_mean);
-                debug!("\t> fragment standard deviation: {}", self.fragment_st_dev);
+                debug!("\t> fragment mean: {}", self.fragment_mean.unwrap());
+                debug!("\t> fragment standard deviation: {}", self.fragment_st_dev.unwrap());
                 debug!("Producing fastq files:\n\t> {}_r1.fastq\n\t {}_r2.fastq",
                     file_prefix, file_prefix
                 )
@@ -270,7 +269,7 @@ pub fn read_config_yaml(yaml: String) -> Result<Box<RunConfiguration>, ConfigErr
     for (key, value) in scrape_config {
         match key.as_str() {
             "reference" => {
-                if value != ".".to_string() {
+                if !(value == ".".to_string() || (value == "REQUIRED".to_string())) {
                     config_builder = config_builder.set_reference(value)
                 } else {
                     error!("Reference is required.");
@@ -405,8 +404,8 @@ mod tests {
             mutation_rate: 0.09,
             ploidy: 3,
             paired_ended: true,
-            fragment_mean: 333.0,
-            fragment_st_dev: 33.0,
+            fragment_mean: Option::from(333.0),
+            fragment_st_dev: Option::from(33.0),
             produce_fastq: false,
             produce_bam: true,
             produce_fasta: true,
@@ -416,14 +415,16 @@ mod tests {
             output_prefix: String::from("Hey.hey")
         };
 
+        println!("{:?}", test_configuration);
+
         assert_eq!(test_configuration.reference, "Hello.world".to_string());
         assert_eq!(test_configuration.read_len, 100);
         assert_eq!(test_configuration.coverage, 22);
         assert_eq!(test_configuration.mutation_rate, 0.09);
         assert_eq!(test_configuration.ploidy, 3);
         assert_eq!(test_configuration.paired_ended, true);
-        assert_eq!(test_configuration.fragment_mean, 333.0);
-        assert_eq!(test_configuration.fragment_st_dev, 33.0);
+        assert_eq!(test_configuration.fragment_mean.unwrap(), 333.0);
+        assert_eq!(test_configuration.fragment_st_dev.unwrap(), 33.0);
         assert_eq!(test_configuration.produce_fastq, false);
         assert_eq!(test_configuration.produce_vcf, true);
         assert_eq!(test_configuration.produce_bam, true);
@@ -445,7 +446,21 @@ mod tests {
         let yaml = String::from("config/neat_test.yml");
         let test_config = read_config_yaml(yaml).unwrap();
         assert_eq!(test_config.reference, "data/ecoli.fa".to_string());
-        assert_eq!(test_config.coverage, 10);
+        assert_eq!(test_config.coverage, 3);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_bad_yaml() {
+        let yaml = String::from("fake_file.yml");
+        read_config_yaml(yaml).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_missing_ref() {
+        let yaml = String::from("config/simple_template.yml");
+        let _test_config = read_config_yaml(yaml).unwrap();
     }
 
     #[test]
@@ -454,8 +469,8 @@ mod tests {
         assert_eq!(config.mutation_rate, 0.001);
         assert_eq!(config.ploidy, 2);
         assert_eq!(config.paired_ended, false);
-        assert_eq!(config.fragment_mean, 300.0);
-        assert_eq!(config.fragment_st_dev, 30.0);
+        assert_eq!(config.fragment_mean, None);
+        assert_eq!(config.fragment_st_dev, None);
         assert_eq!(config.produce_fasta, false);
         assert_eq!(config.produce_fastq, true);
         assert_eq!(config.produce_vcf, false);
@@ -473,8 +488,8 @@ mod tests {
         assert_eq!(config.mutation_rate, 0.111);
         assert_eq!(config.ploidy, 3);
         assert_eq!(config.paired_ended, true);
-        assert_eq!(config.fragment_mean, 111.0);
-        assert_eq!(config.fragment_st_dev, 0.011);
+        assert_eq!(config.fragment_mean, Some(111.0));
+        assert_eq!(config.fragment_st_dev, Some(0.011));
         assert_eq!(config.produce_fasta, true);
         assert_eq!(config.produce_fastq, false);
         assert_eq!(config.produce_vcf, true);
@@ -497,4 +512,91 @@ mod tests {
         let test_config = build_config_from_args(args).unwrap();
         assert_eq!(test_config.reference, "data/ecoli.fa".to_string())
     }
+
+    #[test]
+    #[should_panic]
+    fn test_cl_missing_ref() {
+        let args: Cli = Cli{
+            config: String::new(),
+            reference: String::from(""),
+            output_dir: String::from("test_dir"),
+            log_level: String::from("Trace"),
+            log_dest: String::new(),
+            output_file_prefix: String::from("test"),
+            read_length: 150,
+            coverage: 10,
+        };
+
+        build_config_from_args(args).unwrap();
+    }
+
+    #[test]
+    fn test_overwrite_warn() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_overwrite_output();
+        config.check_and_print_config().unwrap();
+    }
+
+    #[test]
+    fn test_produce_fastq_messages() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_paired_ended(true);
+        config = config.set_fragment_mean(100.0);
+        config = config.set_fragment_st_dev(10.0);
+        // tests first branch of if statement for paired_ended & produce_fastq = true
+        config.check_and_print_config().unwrap();
+        // Checks the alternative pe = true, produce_fastq = false
+        config = config.set_produce_fastq(false);
+        // need to produce at least one file or check will panic
+        config = config.set_produce_fasta(true);
+        config.check_and_print_config().unwrap();
+    }
+
+    #[test]
+    fn test_produce_fasta_messages() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_produce_fasta(true);
+        config.check_and_print_config().unwrap();
+        config = config.set_produce_vcf(true);
+        config.check_and_print_config().unwrap();
+        config = config.set_produce_bam(true);
+        config.check_and_print_config().unwrap();
+        // If the unwraps all work we're good.
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_no_files() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_produce_fastq(false);
+        config.check_and_print_config().unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_no_frag_mean_or_stdev() {
+        let mut config = ConfigBuilder::new();
+        // paired end set to true, by default, fragment mean and st dev are None
+        config = config.set_paired_ended(true);
+        config.check_and_print_config().unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_no_frag_mean() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_paired_ended(true);
+        config = config.set_fragment_st_dev(10.0);
+        config.check_and_print_config().unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_no_stdev() {
+        let mut config = ConfigBuilder::new();
+        config = config.set_paired_ended(true);
+        config = config.set_fragment_mean(100.0);
+        config.check_and_print_config().unwrap();
+    }
+
 }
\ No newline at end of file
diff --git a/src/utils/fasta_tools.rs b/src/utils/fasta_tools.rs
index 5415bb5..3aebfcb 100644
--- a/src/utils/fasta_tools.rs
+++ b/src/utils/fasta_tools.rs
@@ -128,42 +128,53 @@ pub fn write_fasta(
     Ok(())
 }
 
-#[test]
-fn test_conversions() {
-    let initial_sequence = "AAAANNNNGGGGCCCCTTTTAAAA";
-    let test_map: Vec<u8> = vec![0,0,0,0,4,4,4,4,2,2,2,2,1,1,1,1,3,3,3,3,0,0,0,0];
-    let remap: Vec<u8> = initial_sequence.chars().map(|x| char_to_num(x)).collect();
-    assert_eq!(remap, test_map);
-    assert_eq!(sequence_array_to_string(&test_map), initial_sequence);
-}
+#[cfg(test)]
+mod tests {
+    use super::*;
 
-#[test]
-fn test_read_fasta() {
-    let test_fasta = "data/H1N1.fa";
-    let (_test_map, map_order) = read_fasta(test_fasta);
-    assert_eq!(map_order[0], "H1N1_HA".to_string())
-}
+    #[test]
+    fn test_conversions() {
+        let initial_sequence = "AAAANNNNGGGGCCCCTTTTAAAA";
+        let test_map: Vec<u8> = vec![0, 0, 0, 0, 4, 4, 4, 4, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0];
+        let remap: Vec<u8> = initial_sequence.chars().map(|x| char_to_num(x)).collect();
+        assert_eq!(remap, test_map);
+        assert_eq!(sequence_array_to_string(&test_map), initial_sequence);
+    }
+
+    #[test]
+    fn test_read_fasta() {
+        let test_fasta = "data/H1N1.fa";
+        let (_test_map, map_order) = read_fasta(test_fasta);
+        assert_eq!(map_order[0], "H1N1_HA".to_string())
+    }
 
-#[test]
-fn test_write_fasta() -> Result<(), Box<dyn error::Error>> {
+    #[test]
+    #[should_panic]
+    fn test_read_bad_fasta() {
+        let test_fasta = "data/fake.fasta";
+        read_fasta(test_fasta);
+    }
 
-    let seq1: Vec<u8> = vec![0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1];
-    let fasta_output: HashMap<String, Vec<u8>> = HashMap::from([
-        (String::from("H1N1_HA"), seq1)
-    ]);
-    let fasta_pointer = Box::new(fasta_output);
-    let fasta_order = vec![String::from("H1N1_HA")];
-    let output_file = "test";
-    let test_write = write_fasta(
-        &fasta_pointer,
-        &fasta_order,
-        true,
-        output_file
-    );
-    let file_name = "test.fasta";
-    assert_eq!(test_write.unwrap(), ());
-    let attr = fs::metadata(file_name).unwrap();
-    assert!(attr.len() > 0);
-    fs::remove_file(file_name)?;
-    Ok(())
+    #[test]
+    fn test_write_fasta() -> Result<(), Box<dyn error::Error>> {
+        let seq1: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1];
+        let fasta_output: HashMap<String, Vec<u8>> = HashMap::from([
+            (String::from("H1N1_HA"), seq1)
+        ]);
+        let fasta_pointer = Box::new(fasta_output);
+        let fasta_order = vec![String::from("H1N1_HA")];
+        let output_file = "test";
+        let test_write = write_fasta(
+            &fasta_pointer,
+            &fasta_order,
+            true,
+            output_file
+        );
+        let file_name = "test.fasta";
+        assert_eq!(test_write.unwrap(), ());
+        let attr = fs::metadata(file_name).unwrap();
+        assert!(attr.len() > 0);
+        fs::remove_file(file_name)?;
+        Ok(())
+    }
 }
\ No newline at end of file
diff --git a/src/utils/fastq_tools.rs b/src/utils/fastq_tools.rs
index 4d82443..49ab66e 100644
--- a/src/utils/fastq_tools.rs
+++ b/src/utils/fastq_tools.rs
@@ -87,4 +87,76 @@ pub fn write_fastq(
         fs::remove_file(filename2)?;
     }
     Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::Path;
+
+    #[test]
+    fn test_complement() {
+        let nuc1 = 0;
+        let nuc2 = 1;
+        let nuc3 = 2;
+        let nuc4 = 3;
+        let nuc5 = 4;
+
+        assert_eq!(complement(nuc1), 3);
+        assert_eq!(complement(nuc2), 2);
+        assert_eq!(complement(nuc3), 1);
+        assert_eq!(complement(nuc4), 0);
+        assert_eq!(complement(nuc5), 4);
+    }
+
+    #[test]
+    fn test_reverse_complement() {
+        let read: Vec<u8> = vec![0, 0, 0, 0, 1, 1, 1, 1];
+        let revcomp: Vec<u8> = vec![2, 2, 2, 2, 3, 3, 3, 3];
+        assert_eq!(reverse_complement(&read), revcomp);
+    }
+
+    #[test]
+    fn test_write_fastq_single() {
+        let fastq_filename = "test_single";
+        let overwrite_output = true;
+        let paired_ended = false;
+        let seq1 = vec![0, 0, 0, 0, 1, 1, 1, 1];
+        let seq2 = vec![2, 2, 2, 2, 3, 3, 3, 3,];
+        let dataset = vec![&seq1, &seq2];
+        write_fastq(
+            fastq_filename,
+            overwrite_output,
+            paired_ended,
+            dataset
+        ).unwrap();
+        let outfile1 = Path::new("test_single_r1.fastq");
+        let outfile2 = Path::new("test_single_r2.fastq");
+        assert!(outfile1.exists());
+        assert!(!outfile2.exists());
+        fs::remove_file(outfile1).unwrap();
+    }
+
+    #[test]
+    fn test_write_fastq_paired() {
+        let fastq_filename = "test_paired";
+        // might as well test the o_o function as well
+        let overwrite_output = false;
+        let paired_ended = true;
+        let seq1 = vec![0, 0, 0, 0, 1, 1, 1, 1];
+        let seq2 = vec![2, 2, 2, 2, 3, 3, 3, 3,];
+        let dataset = vec![&seq1, &seq2];
+        write_fastq(
+            fastq_filename,
+            overwrite_output,
+            paired_ended,
+            dataset
+        ).unwrap();
+        let outfile1 = Path::new("test_paired_r1.fastq");
+        let outfile2 = Path::new("test_paired_r2.fastq");
+        assert!(outfile1.exists());
+        assert!(outfile2.exists());
+        fs::remove_file(outfile1).unwrap();
+        fs::remove_file(outfile2).unwrap();
+    }
 }
\ No newline at end of file
diff --git a/src/utils/file_tools.rs b/src/utils/file_tools.rs
index cc950fb..3aa2523 100644
--- a/src/utils/file_tools.rs
+++ b/src/utils/file_tools.rs
@@ -2,7 +2,6 @@ use std::fs::File;
 use std::io;
 use std::io::BufRead;
 use std::path::Path;
-use log::error;
 
 pub fn read_lines(filename: &str) -> io::Result<io::Lines<io::BufReader<File>>> {
     // This creates a buffer to read lines
@@ -18,13 +17,28 @@ pub fn open_file(mut filename: &mut str, overwrite_file: bool) -> File {
     }
 }
 
-pub fn check_parent_and_create(filename: &str) -> io::Result<&Path> {
-    // checks that the parent dir exists and then if so creates the file object open
+pub fn check_parent(filename: &str) -> io::Result<&Path> {
+    // checks that the parent dir exists and then if so creates the Path object open
     // and ready to write
     let file_path = Path::new(filename);
-    if !file_path.parent().unwrap().exists() {
-        error!("Path to log file not found!");
-        assert!(file_path.parent().unwrap().exists())
-    }
+    assert!(file_path.parent().unwrap().exists());
     Ok(file_path)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_check_parent() {
+        let filename = "data/H1N1.fa";
+        check_parent(filename).unwrap();
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_check_parent_fail() {
+        let filename = "fake/test.fa";
+        check_parent(filename).unwrap();
+    }
 }
\ No newline at end of file
diff --git a/src/utils/make_reads.rs b/src/utils/make_reads.rs
index a4aba9a..a9aecc7 100644
--- a/src/utils/make_reads.rs
+++ b/src/utils/make_reads.rs
@@ -101,8 +101,8 @@ pub fn generate_reads(
     read_length: &usize,
     coverage: &usize,
     paired_ended: bool,
-    mean: f64,
-    st_dev: f64,
+    mean: Option<f64>,
+    st_dev: Option<f64>,
     mut rng: &mut ThreadRng,
 ) -> Box<HashSet<Vec<u8>>> {
     /*
@@ -122,7 +122,7 @@ pub fn generate_reads(
     let mut fragment_pool: Vec<usize> = Vec::new();
     if paired_ended {
         let num_frags = (mutated_sequence.len() / read_length) * (coverage * 2);
-        let fragment_distribution = Normal::new(mean, st_dev).unwrap();
+        let fragment_distribution = Normal::new(mean.unwrap(), st_dev.unwrap()).unwrap();
         // add fragments to the fragment pool
         for _ in 0..num_frags {
             let frag = fragment_distribution.sample(&mut rng).round() as usize;
@@ -149,4 +149,33 @@ pub fn generate_reads(
     }
     // puts the reads in the heap.
     Box::new(read_set)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::thread_rng;
+
+    #[test]
+    fn test_cover_dataset() {
+        let span_length = 100;
+        let read_length = 10;
+        let fragment_pool = vec![10];
+        let coverage = 1;
+        let mut rng = thread_rng();
+
+        let cover = cover_dataset(
+            span_length,
+            read_length,
+            fragment_pool,
+            coverage,
+            &mut rng,
+        );
+        assert_eq!(cover[0], (0,10))
+    }
+
+    #[test]
+    fn test_generate_reads() {
+        todo!()
+    }
 }
\ No newline at end of file

From 685ad32c231fd555be5e48ab0ea40f07dd73ae06 Mon Sep 17 00:00:00 2001
From: Joshua Allen <jallen17@illinois.edu>
Date: Wed, 20 Mar 2024 22:56:34 -0500
Subject: [PATCH 2/2] Added extensive tests. Coverage is at 76% for the entire
 file, and 86% for the libraries. But, some of the 'uncovered code' is
 actually covered. May add more tests later.

---
 Cargo.toml               |   2 +-
 config/neat_test.yml     |   2 +-
 src/main.rs              |  81 ++---------------
 src/utils.rs             |   5 +-
 src/utils/config.rs      |   3 +-
 src/utils/fasta_tools.rs |  52 ++---------
 src/utils/fastq_tools.rs |   6 +-
 src/utils/file_tools.rs  |   8 +-
 src/utils/make_reads.rs  |  61 ++++++++++++-
 src/utils/mutate.rs      | 134 ++++++++++++++--------------
 src/utils/nucleotides.rs | 184 +++++++++++++++++++++++++++++++++++++++
 src/utils/runner.rs      | 122 ++++++++++++++++++++++++++
 src/utils/vcf_tools.rs   |  58 ++++++++++--
 13 files changed, 509 insertions(+), 209 deletions(-)
 create mode 100644 src/utils/nucleotides.rs
 create mode 100644 src/utils/runner.rs

diff --git a/Cargo.toml b/Cargo.toml
index 10feb57..2f22a48 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,4 +13,4 @@ serde_yaml = "0.9.32"
 clap = { version = "4.5.1", features = ["derive"] }
 itertools = "0.12.1"
 assert_fs = "1.1.1"
-rand_distr = "0.5.0-alpha.0"
\ No newline at end of file
+rand_distr = "0.5.0-alpha.0"
diff --git a/config/neat_test.yml b/config/neat_test.yml
index 482b93e..f14b3f5 100644
--- a/config/neat_test.yml
+++ b/config/neat_test.yml
@@ -13,5 +13,5 @@ produce_fasta: true
 produce_fastq: false
 
 overwrite_output: true
-output_dir: /home/joshfactorial/code/neat_output
+output_dir: .
 output_prefix: testing
diff --git a/src/main.rs b/src/main.rs
index f475e4f..74d8512 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,24 +8,19 @@ extern crate itertools;
 
 mod utils;
 
-use std::collections::{HashMap, HashSet};
+use std::collections::HashMap;
 use std::fs::File;
 use clap::{Parser};
 use log::*;
 use simplelog::*;
 use rand::thread_rng;
-use rand::prelude::*;
 
 use utils::cli;
-use utils::fasta_tools::{read_fasta, write_fasta};
 use utils::config::{read_config_yaml, build_config_from_args};
-use utils::mutate::mutate_fasta;
-use utils::make_reads::generate_reads;
-use utils::fastq_tools::write_fastq;
 use utils::file_tools::check_parent;
-use utils::vcf_tools::write_vcf;
+use utils::runner::run_neat;
 
-fn main() {
+fn main() -> Result<(), std::fmt::Error> {
 
     info!("Begin processing");
     // parse the arguments from the command line
@@ -77,73 +72,7 @@ fn main() {
         Ok(build_config_from_args(args).expect("Problem reading configuration yaml file"))
     }.unwrap();
 
-    // Create the prefix of the files to write
-    let output_file = format!("{}/{}", config.output_dir, config.output_prefix);
-
-    // Reading the reference file into memory
-    info!("Mapping reference fasta file: {}", &config.reference);
-    let (fasta_map, fasta_order) = read_fasta(&config.reference);
-
-    // Mutating the reference and recording the variant locations.
-    info!("Mutating reference.");
-    let (mutated_map, variant_locations) = mutate_fasta(
-        &fasta_map,
-        &mut rng
-    );
-
-    if config.produce_fasta {
-        info!("Outputting fasta file");
-        write_fasta(
-            &mutated_map,
-            &fasta_order,
-            config.overwrite_output,
-            &output_file,
-        ).expect("Problem writing fasta file");
-    }
-
-    if config.produce_vcf {
-        info!("Writing vcf file");
-        write_vcf(
-            &variant_locations,
-            &fasta_order,
-            config.ploidy,
-            &config.reference,
-            config.overwrite_output,
-            &output_file,
-            &mut rng).expect("Error writing vcf file")
-    }
-
-    let mut read_sets: HashSet<Vec<u8>> = HashSet::new();
-    for (_name, sequence) in mutated_map.iter() {
-        // defined as a set of read sequences that should cover
-        // the mutated sequence `coverage` number of times
-        let data_set = generate_reads(
-            sequence,
-            &config.read_len,
-            &config.coverage,
-            config.paired_ended,
-            config.fragment_mean,
-            config.fragment_st_dev,
-            &mut rng
-        );
-
-        read_sets.extend(*data_set);
-    }
-
-    if config.produce_fastq {
-        info!("Shuffling output fastq data");
-        let mut outsets: Box<Vec<&Vec<u8>>> = Box::new(read_sets.iter().collect());
-        outsets.shuffle(&mut rng);
-        
-        info!("Writing fastq");
-        write_fastq(
-            &output_file,
-            config.overwrite_output,
-            config.paired_ended,
-            *outsets,
-        ).expect("Problem writing fastq file");
-        info!("Processing complete")
-    }
-
+    run_neat(config, &mut rng).unwrap();
+    Ok(())
 }
 
diff --git a/src/utils.rs b/src/utils.rs
index 04d2f1f..ff415ec 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -5,4 +5,7 @@ pub mod cli;
 pub mod make_reads;
 pub mod mutate;
 pub mod fastq_tools;
-pub mod vcf_tools;
\ No newline at end of file
+pub mod vcf_tools;
+pub mod nucleotides;
+
+pub mod runner;
\ No newline at end of file
diff --git a/src/utils/config.rs b/src/utils/config.rs
index 5a2d1ff..ff637b6 100644
--- a/src/utils/config.rs
+++ b/src/utils/config.rs
@@ -4,6 +4,7 @@ use std::collections::{HashMap};
 use std::string::String;
 use crate::utils::cli::Cli;
 use log::{debug, warn, error};
+use std::env;
 
 #[derive(Debug)]
 pub struct RunConfiguration {
@@ -99,7 +100,7 @@ impl ConfigBuilder {
             produce_vcf: false,
             produce_bam: false,
             overwrite_output: false,
-            output_dir: String::from(""),
+            output_dir: String::from(env::current_dir().unwrap().to_str().unwrap()),
             output_prefix: String::from("neat_out"),
         }
     }
diff --git a/src/utils/fasta_tools.rs b/src/utils/fasta_tools.rs
index 3aebfcb..a2504ce 100644
--- a/src/utils/fasta_tools.rs
+++ b/src/utils/fasta_tools.rs
@@ -1,52 +1,17 @@
-extern crate rand;
-extern crate log;
-extern crate assert_fs;
-
-use crate::utils::file_tools::read_lines;
-use self::log::info;
+use log::info;
 use std::io;
 use std::io::Write;
 use std::*;
 use HashMap;
+use utils::file_tools::read_lines;
 use utils::file_tools::open_file;
-
-pub fn char_to_num(char_of_interest: char) -> u8 {
-    return match char_of_interest {
-        'A' | 'a' => 0,
-        'C' | 'c' => 1,
-        'G' | 'g' => 2,
-        'T' | 't' => 3,
-        _ => 4
-    }
-}
-
-pub fn num_to_char(nuc_num: u8) -> &'static str {
-    return match nuc_num {
-        0 => "A",
-        1 => "C",
-        2 => "G",
-        3 => "T",
-        _ => "N",
-    }
-}
-
-#[repr(u8)]
-#[allow(unused)]
-pub enum NucRepr {
-    // I just discovered this repr type thing and need to investigate.
-    // todo figure out if we can use this idea to simplify things
-    A = 0,
-    C = 1,
-    G = 2,
-    T = 3,
-    N = 4,
-}
+use utils::nucleotides::{u8_to_base, base_to_u8};
 
 pub fn sequence_array_to_string(input_array: &Vec<u8>) -> String {
     // Converts a sequence vector into a string representing the DNA sequence
     let mut return_string = String::new();
     for num in input_array {
-        return_string += num_to_char(*num);
+        return_string += &(u8_to_base(*num).to_string());
     }
     return_string
 }
@@ -74,7 +39,7 @@ pub fn read_fasta(
                 temp_seq = vec![];
             } else {
                 for char in l.chars() {
-                    temp_seq.push(char_to_num(char));
+                    temp_seq.push(base_to_u8(char));
                 }
             }
         },
@@ -102,7 +67,8 @@ pub fn write_fasta(
      */
     // writing fasta output to files
     let mut output_fasta = format!("{}.fasta", output_file);
-    let mut outfile = open_file(&mut output_fasta, overwrite_output);
+    let mut outfile = open_file(&mut output_fasta, overwrite_output)
+        .expect(&format!("Error opening {}", output_fasta));
     for contig in fasta_order {
         let sequence = &fasta_output[contig];
         // Write contig name
@@ -119,7 +85,7 @@ pub fn write_fasta(
                 max = this_length
             }
             for j in 0..max {
-                line += num_to_char(sequence_to_write[i + j]);
+                line += &(u8_to_base(sequence_to_write[i + j]).to_string());
             }
             writeln!(&mut outfile, "{}", line)?;
             i += 70;
@@ -136,7 +102,7 @@ mod tests {
     fn test_conversions() {
         let initial_sequence = "AAAANNNNGGGGCCCCTTTTAAAA";
         let test_map: Vec<u8> = vec![0, 0, 0, 0, 4, 4, 4, 4, 2, 2, 2, 2, 1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0];
-        let remap: Vec<u8> = initial_sequence.chars().map(|x| char_to_num(x)).collect();
+        let remap: Vec<u8> = initial_sequence.chars().map(|x| base_to_u8(x)).collect();
         assert_eq!(remap, test_map);
         assert_eq!(sequence_array_to_string(&test_map), initial_sequence);
     }
diff --git a/src/utils/fastq_tools.rs b/src/utils/fastq_tools.rs
index 49ab66e..7be5d1e 100644
--- a/src/utils/fastq_tools.rs
+++ b/src/utils/fastq_tools.rs
@@ -54,11 +54,13 @@ pub fn write_fastq(
     let name_prefix = "neat_generated_".to_string();
     let mut filename1 = String::from(fastq_filename) + "_r1.fastq";
     // open the file and append lines
-    let mut outfile1 = open_file(&mut filename1, overwrite_output);
+    let mut outfile1 = open_file(&mut filename1, overwrite_output)
+        .expect(&format!("Error opening output {}", filename1));
     // setting up pairend ended reads For single ended reads, this will go unused.
     let mut filename2 = String::from(fastq_filename) + "_r2.fastq";
     // open the second file and append lines
-    let mut outfile2 = open_file(&mut filename2, overwrite_output);
+    let mut outfile2 = open_file(&mut filename2, overwrite_output)
+        .expect(&format!("Error opening output {}", filename2));
     // write out sequence by sequence using index to track the numbering
     let mut index = 1;
     for sequence in dataset {
diff --git a/src/utils/file_tools.rs b/src/utils/file_tools.rs
index 3aa2523..550fa08 100644
--- a/src/utils/file_tools.rs
+++ b/src/utils/file_tools.rs
@@ -1,6 +1,6 @@
 use std::fs::File;
 use std::io;
-use std::io::BufRead;
+use std::io::{BufRead, Error};
 use std::path::Path;
 
 pub fn read_lines(filename: &str) -> io::Result<io::Lines<io::BufReader<File>>> {
@@ -9,11 +9,11 @@ pub fn read_lines(filename: &str) -> io::Result<io::Lines<io::BufReader<File>>>
     Ok(io::BufReader::new(file).lines())
 }
 
-pub fn open_file(mut filename: &mut str, overwrite_file: bool) -> File {
+pub fn open_file(mut filename: &mut str, overwrite_file: bool) -> Result<File, Error> {
     if overwrite_file {
-        File::options().create(true).write(true).open(&mut filename).unwrap()
+        File::options().create(true).write(true).open(&mut filename)
     } else {
-        File::options().create_new(true).append(true).open(&mut filename).unwrap()
+        File::options().create_new(true).append(true).open(&mut filename)
     }
 }
 
diff --git a/src/utils/make_reads.rs b/src/utils/make_reads.rs
index a9aecc7..88f9278 100644
--- a/src/utils/make_reads.rs
+++ b/src/utils/make_reads.rs
@@ -175,7 +175,64 @@ mod tests {
     }
 
     #[test]
-    fn test_generate_reads() {
-        todo!()
+    fn test_gap_function() {
+        let span_length = 100_000;
+        let read_length = 100;
+        let fragment_pool = vec![300];
+        let coverage = 1;
+        let mut rng = thread_rng();
+
+        let cover = cover_dataset(
+            span_length,
+            read_length,
+            fragment_pool,
+            coverage,
+            &mut rng,
+        );
+        assert_eq!(cover[0], (0, 300))
+    }
+
+    #[test]
+    fn test_generate_reads_single() {
+        let mutated_sequence = vec![0, 0, 1, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 2, 2, 2, 4, 4, 4, 4];
+        let read_length = 10;
+        let coverage = 1;
+        let paired_ended = false;
+        let mean = None;
+        let st_dev = None;
+        let mut rng = thread_rng();
+        let reads = generate_reads(
+            &mutated_sequence,
+            &read_length,
+            &coverage,
+            paired_ended,
+            mean,
+            st_dev,
+            &mut rng,
+        );
+        println!("{:?}", reads);
+        assert!(reads.contains(&(vec![0, 0, 1, 0, 3, 3, 3, 3, 0, 0])));
+    }
+
+    #[test]
+    fn test_generate_reads_paired() {
+        let mutated_sequence: Vec<u8> = std::iter::repeat(1).take(100_000).collect();
+        let read_length = 100;
+        let coverage = 1;
+        let paired_ended = true;
+        let mean = Some(200.0);
+        let st_dev = Some(1.0);
+        let mut rng = thread_rng();
+        let reads = generate_reads(
+            &mutated_sequence,
+            &read_length,
+            &coverage,
+            paired_ended,
+            mean,
+            st_dev,
+            &mut rng,
+        );
+        println!("{:?}", reads);
+        assert!(!reads.is_empty())
     }
 }
\ No newline at end of file
diff --git a/src/utils/mutate.rs b/src/utils/mutate.rs
index 20894ca..6084cbc 100644
--- a/src/utils/mutate.rs
+++ b/src/utils/mutate.rs
@@ -1,53 +1,10 @@
 use std::cmp::max;
 use std::collections::HashMap;
 use rand::prelude::{ThreadRng, IndexedRandom};
-use rand::seq::SliceRandom;
 use rand::Rng;
 use log::debug;
 use itertools::izip;
-
-#[derive(Debug, Ord, PartialOrd, Eq, PartialEq)]
-struct NucModel {
-    // This simple nucleotide model simple tracks the base to mutate from
-    // and the weights of each are turned into a vector. For example, if the weight count for "A"
-    // is 8, then the vector will be [0, 0, 0, 0, 0, 0, 0, 0].
-    // 0: A, 1: C, 2: G, 3: T
-    base: u8,
-    // vector of 0s, the length of which is the weight of the A in the vector.
-    a: Vec<u8>,
-    // vector of 1s, the length of which is the weight of the C in the vector.
-    c: Vec<u8>,
-    // vector of 2s, the length of which is the weight of the G in the vector.
-    g: Vec<u8>,
-    // vector of 3s, the length of which is the weight of the T in the vector.
-    t: Vec<u8>,
-}
-
-impl NucModel {
-    fn from(base: u8, weights: Vec<usize>) -> Self {
-        Self {
-            base,
-            a: vec![0; weights[0]],
-            c: vec![1; weights[1]],
-            g: vec![2; weights[2]],
-            t: vec![3; weights[3]],
-        }
-    }
-
-    fn choose_new_nuc(&self, mut rng: &mut ThreadRng) -> u8 {
-        // concatenate the vectors together
-        let mut choices = [&self.a[..], &self.c[..], &self.g[..], &self.t[..]].concat();
-        // shuffle the weighted list, based on whatever statistics we come up with.
-        choices.shuffle(&mut rng);
-        // start with the first choice
-        let mut i = 0;
-        // Look for the first nuc that isn't the old nuc.
-        while choices[i] == self.base {
-            i += 1;
-        }
-        choices[i]
-    }
-}
+use utils::nucleotides::NucModel;
 
 pub fn mutate_fasta(
     file_struct: &HashMap<String, Vec<u8>>,
@@ -99,7 +56,7 @@ pub fn mutate_fasta(
         let num_positions = max(1, rough_num_positions.round() as usize);
 
         (mutated_record, contig_mutations) = mutate_sequence(
-            mutated_record, num_positions, &mut rng
+            &mutated_record, num_positions, &mut rng
         );
         // Add to the return struct and variants map.
         return_struct.entry(name.clone()).or_insert(mutated_record.clone());
@@ -110,7 +67,7 @@ pub fn mutate_fasta(
 }
 
 fn mutate_sequence(
-    sequence: Vec<u8>,
+    sequence: &Vec<u8>,
     num_positions: usize,
     mut rng: &mut ThreadRng
 ) -> (Vec<u8>, Vec<(usize, u8, u8)>) {
@@ -134,44 +91,83 @@ fn mutate_sequence(
     let weights = vec![1; mutated_record.len()];
     // zip together weights and positions
     let mut weighted_positions: Vec<(usize, i32)> = Vec::new();
+    // find all non n positions.
+    let non_n_positions: Vec<usize> = mutated_record
+        .iter()
+        .enumerate()
+        .filter(|&(_, y)| *y != 4u8)
+        .map(|(x, _)| x)
+        .collect();
     // izip!() accepts iterators and/or values with IntoIterator.
-    for (x, y) in izip!(&mut (0..mutated_record.len()), &weights) {
-        weighted_positions.push((x, *y))
+    for (x, y) in izip!(&non_n_positions, &weights) {
+        weighted_positions.push((*x, *y))
     }
     // now choose a random selection of num_positions without replacement
     let mutated_elements: Vec<&(usize, i32)> = weighted_positions
         .choose_multiple_weighted(&mut rng, num_positions, |x| x.1)
         .unwrap()
         .collect::<Vec<_>>();
-    // Build mutation model
-    /*
-    Using NEAT's original mutation model of
-    [[0.0, 0.15, 0.7, 0.15],
-     [0.15, 0.0, 0.15, 0.7],
-     [0.7, 0.15, 0.0, 0.15],
-     [0.15, 0.7, 0.15, 0.0]]
-
-     multiply each value by 20, which produces integer values
-     */
-    let mut_a = NucModel::from(0, vec![0, 3, 14, 3]);
-    let mut_c = NucModel::from(1, vec![3, 0, 3, 14]);
-    let mut_g = NucModel::from(2, vec![14, 3, 0, 3]);
-    let mut_t = NucModel::from(3, vec![3, 14, 3, 0]);
+    // Build the default mutation model
+    // todo incorporate custom models
+    let nucleotide_mutation_model = NucModel::new();
 
     // Will hold the variants added to this sequence
     let mut sequence_variants: Vec<(usize, u8, u8)> = Vec::new();
     // for each index, picks a new base
     for (index, _weight) in mutated_elements {
+        // Skip the N's
+        if sequence[*index] == 4 {
+            continue
+        }
+        // remeber the reference for later.
         let reference_base = sequence[*index];
-        mutated_record[*index] = match reference_base {
-            0 => mut_a.choose_new_nuc(&mut rng),
-            1 => mut_c.choose_new_nuc(&mut rng),
-            2 => mut_g.choose_new_nuc(&mut rng),
-            3 => mut_t.choose_new_nuc(&mut rng),
-            _ => 4
-        };
+        // pick a new base and assign the position to it.
+        mutated_record[*index] = nucleotide_mutation_model.choose_new_nuc(reference_base, &mut rng);
+        // This check simply ensures that our model actually mutated the base.
+        if mutated_record[*index] == reference_base {
+            panic!("BUG: Mutation model failed to mutate the base. This should not happen.")
+        }
         // add the location and alt base for the variant
         sequence_variants.push((*index, mutated_record[*index], reference_base))
     }
     (mutated_record, sequence_variants)
+}
+
+#[cfg(test)]
+mod tests {
+    use rand::thread_rng;
+    use super::*;
+
+    #[test]
+    fn test_mutate_sequence() {
+        let seq1: Vec<u8> = vec![4, 4, 0, 0, 0, 1, 1, 2, 0, 3, 1, 1, 1];
+        let num_positions = 2;
+        let mut rng = thread_rng();
+        let mutant = mutate_sequence(&seq1, num_positions, &mut rng);
+        assert_eq!(mutant.0.len(), seq1.len());
+        assert!(!mutant.1.is_empty());
+        // N's stay N's
+        assert_eq!(mutant.0[0], 4);
+        assert_eq!(mutant.0[1], 4);
+    }
+
+    #[test]
+    fn test_mutate_fasta() {
+        let seq = vec![4, 4, 0, 0, 0, 1, 1, 2, 0, 3, 1, 1, 1];
+        let file_struct: HashMap<String, Vec<u8>> = HashMap::from([
+                ("chr1".to_string(), seq.clone())
+            ]);
+        let mut rng = thread_rng();
+        let mutations = mutate_fasta(
+            &file_struct,
+            &mut rng,
+        );
+        assert!(mutations.0.contains_key("chr1"));
+        assert!(mutations.1.contains_key("chr1"));
+        let mutation_location = mutations.1["chr1"][0].0;
+        let mutation_alt = mutations.1["chr1"][0].1;
+        let mutation_ref = mutations.1["chr1"][0].2;
+        assert_eq!(mutation_ref, seq[mutation_location]);
+        assert_ne!(mutation_alt, mutation_ref)
+    }
 }
\ No newline at end of file
diff --git a/src/utils/nucleotides.rs b/src/utils/nucleotides.rs
new file mode 100644
index 0000000..ad517b9
--- /dev/null
+++ b/src/utils/nucleotides.rs
@@ -0,0 +1,184 @@
+use rand::prelude::ThreadRng;
+use itertools::izip;
+use rand::seq::IndexedRandom;
+
+/// Throughout this program, we are standardizing the use of a u8 representation of the nucleotides
+///     A = 0
+///     C = 1
+///     G = 2
+///     T = 3
+///     N = 4
+/// This is intended to make it easier to store them. We thought about using the u8 representation
+/// of the character as built into Rust, but we'd then have to figure out the translations and keep
+/// track of extra numbers. So this is intended to simplify everything
+
+pub fn base_to_u8(char_of_interest: char) -> u8 {
+    // This defines the relationship between the 4 possible nucleotides in DNA and
+    // a simple u8 numbering system. Everything that isn't a recognized base is a 4.
+    // Note that NEAT ignores soft masking.
+    return match char_of_interest {
+        'A' | 'a' => 0,
+        'C' | 'c' => 1,
+        'G' | 'g' => 2,
+        'T' | 't' => 3,
+        _ => 4,
+    }
+}
+
+pub fn u8_to_base(nuc_num: u8) -> char {
+    // Canonical conversion from base u8 representation back into the character.
+    // We're returning a string instead of a char to facilitate. No attempt to preserve or display
+    // any soft masking.
+    return match nuc_num {
+        0 => 'A',
+        1 => 'C',
+        2 => 'G',
+        3 => 'T',
+        _ => 'N',
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct NucModel {
+    // Simple mutation model. The letter in the model represents
+    // the base we are mutating and the vector are the weights for mutating
+    // to another base (in the same a, c, g, t order)
+    //
+    // By definition, the model is a 4x4 matrix with zeros along the diagonal
+    // because, e.g., A can't "mutate" to A.
+    a: Vec<usize>,
+    c: Vec<usize>,
+    g: Vec<usize>,
+    t: Vec<usize>,
+}
+
+impl NucModel {
+
+    pub fn new() -> Self {
+        // Default mutation model based on the original from NEAT 2.0
+        Self {
+            a: vec![0, 3, 14, 3],
+            c: vec![3, 0, 3, 14],
+            g: vec![14, 3, 0, 3],
+            t: vec![3, 14, 3, 0],
+        }
+    }
+    pub fn from(weights: Vec<Vec<usize>>) -> Self {
+        // Supply a vector of 4 vectors that define the mutation chance
+        // from the given base to the other 4 bases.
+
+        // First some safety checks. This should be a 4x4 matrix defining mutation from
+        // ACGT (top -> down) to ACGT (left -> right)
+        if weights.len() != 4 {
+            panic!("Weights supplied to NucModel is wrong size");
+        }
+        for weight_vec in &weights {
+            if weight_vec.len() != 4 {
+                panic!("Weights supplied to NucModel is wrong size");
+            }
+        }
+
+        Self {
+            a: weights[0].clone(),
+            c: weights[1].clone(),
+            g: weights[2].clone(),
+            t: weights[3].clone(),
+        }
+    }
+
+    pub fn choose_new_nuc(&self, base: u8, mut rng: &mut ThreadRng) -> u8 {
+        // We'll handle the trivial case first:
+        if base >= 4 {
+            return 4;
+        }
+
+        // the canonical choices for DNA, as defined above
+        let choices: [u8; 4] = [0, 1, 2, 3];
+        // Pick the weights list for the base that was input
+        let weights: Vec<usize> = match base {
+            0 => self.a.clone(),
+            1 => self.c.clone(),
+            2 => self.g.clone(),
+            3 => self.t.clone(),
+            // we filtered out everything 4 and over above
+            _ => { panic!("This line should be unreachable") },
+        };
+
+        let mut weighted_bases: Vec<(u8, usize)> = Vec::new();
+        // This for loop pairs the base with its weight.
+        for (x, y) in izip!(choices, weights) {
+            weighted_bases.push((x, y))
+        }
+
+        // return a new base, based on the selection. Note that the weight for the input base
+        // will be always be zero (by design), so there is no chance of returning the original.
+        weighted_bases
+            .choose_weighted(&mut rng, |x| x.1)
+            .unwrap()
+            .0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rand::thread_rng;
+    use super::*;
+
+    #[test]
+    fn test_nuc_model_build() {
+        let a_weights = vec![0, 20, 1, 20];
+        let c_weights = vec![20, 0, 1, 1];
+        let g_weights = vec![1, 1, 0, 20];
+        let t_weights = vec![20, 1, 20, 0];
+
+        let model = NucModel {
+            a: a_weights.clone(),
+            c: c_weights.clone(),
+            g: g_weights.clone(),
+            t: t_weights.clone(),
+        };
+
+        let str = format!("{:?}", model);
+        let str_repr = String::from("NucModel { a: [0, 20, 1, 20], c: [20, 0, 1, 1], g: [1, 1, 0, 20], t: [20, 1, 20, 0] }");
+        assert_eq!(str, str_repr);
+        assert_eq!(model.a, a_weights);
+    }
+
+    #[test]
+    fn test_nuc_model_from_weights() {
+        let a_weights = vec![0, 20, 1, 20];
+        let c_weights = vec![20, 0, 1, 1];
+        let g_weights = vec![1, 1, 0, 20];
+        let t_weights = vec![20, 1, 20, 0];
+        let mut rng = thread_rng();
+        let test_model = NucModel::from(vec![a_weights, c_weights, g_weights, t_weights]);
+        // It actually mutates the base
+        assert_ne!(test_model.choose_new_nuc(0, &mut rng), 0);
+        assert_ne!(test_model.choose_new_nuc(1, &mut rng), 1);
+        assert_ne!(test_model.choose_new_nuc(2, &mut rng), 2);
+        assert_ne!(test_model.choose_new_nuc(3, &mut rng), 3);
+        // It gives back N when you give it N
+        assert_eq!(test_model.choose_new_nuc(4, &mut rng), 4);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_nuc_model_too_many_vecs() {
+        let a_weights = vec![0, 20, 1, 20];
+        let c_weights = vec![20, 0, 1, 1];
+        let g_weights = vec![1, 1, 0, 20];
+        let t_weights = vec![20, 1, 20, 0];
+        let u_weights = vec![20, 1, 20, 0];
+        NucModel::from(vec![a_weights, c_weights, g_weights, t_weights, u_weights]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_nuc_model_too_many_bases() {
+        let a_weights = vec![0, 20, 1, 20, 1];
+        let c_weights = vec![20, 0, 1, 1];
+        let g_weights = vec![1, 1, 0, 20];
+        let t_weights = vec![20, 1, 20, 0];
+        NucModel::from(vec![a_weights, c_weights, g_weights, t_weights]);
+    }
+}
\ No newline at end of file
diff --git a/src/utils/runner.rs b/src/utils/runner.rs
new file mode 100644
index 0000000..8393c89
--- /dev/null
+++ b/src/utils/runner.rs
@@ -0,0 +1,122 @@
+use std::collections::HashSet;
+use std::fmt::{Display, Formatter};
+use log::info;
+use rand::prelude::SliceRandom;
+use rand::rngs::ThreadRng;
+use utils::config::RunConfiguration;
+use utils::fasta_tools::{read_fasta, write_fasta};
+use utils::fastq_tools::write_fastq;
+use utils::make_reads::generate_reads;
+use utils::mutate::mutate_fasta;
+use utils::vcf_tools::write_vcf;
+
+#[derive(Debug)]
+pub enum NeatErrors {
+    FastaError,
+    FastqError,
+    VcfError,
+}
+
+impl Display for NeatErrors {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        todo!()
+    }
+}
+
+pub fn run_neat(config: Box<RunConfiguration>, mut rng: &mut ThreadRng) -> Result<(), NeatErrors>{
+    // Create the prefix of the files to write
+    let output_file = format!("{}/{}", config.output_dir, config.output_prefix);
+
+    // Reading the reference file into memory
+    info!("Mapping reference fasta file: {}", &config.reference);
+    let (fasta_map, fasta_order) = read_fasta(&config.reference);
+
+    // Mutating the reference and recording the variant locations.
+    info!("Mutating reference.");
+    let (mutated_map, variant_locations) = mutate_fasta(
+        &fasta_map,
+        &mut rng
+    );
+
+    if config.produce_fasta {
+        info!("Outputting fasta file");
+        write_fasta(
+            &mutated_map,
+            &fasta_order,
+            config.overwrite_output,
+            &output_file,
+        )
+            .or(Err(NeatErrors::FastaError))
+            .expect("Error writing fasta file");
+    }
+
+    if config.produce_vcf {
+        info!("Writing vcf file");
+        write_vcf(
+            &variant_locations,
+            &fasta_order,
+            config.ploidy,
+            &config.reference,
+            config.overwrite_output,
+            &output_file,
+            &mut rng
+        )
+            .or(Err(NeatErrors::VcfError))
+            .expect("Error writing VCF file");
+    }
+
+    let mut read_sets: HashSet<Vec<u8>> = HashSet::new();
+    for (_name, sequence) in mutated_map.iter() {
+        // defined as a set of read sequences that should cover
+        // the mutated sequence `coverage` number of times
+        let data_set = generate_reads(
+            sequence,
+            &config.read_len,
+            &config.coverage,
+            config.paired_ended,
+            config.fragment_mean,
+            config.fragment_st_dev,
+            &mut rng
+        );
+
+        read_sets.extend(*data_set);
+    }
+
+    if config.produce_fastq {
+        info!("Shuffling output fastq data");
+        let mut outsets: Box<Vec<&Vec<u8>>> = Box::new(read_sets.iter().collect());
+        outsets.shuffle(&mut rng);
+
+        info!("Writing fastq");
+        write_fastq(
+            &output_file,
+            config.overwrite_output,
+            config.paired_ended,
+            *outsets,
+        )
+            .or(Err(NeatErrors::FastqError))
+            .expect("Problem writing fastq file");
+        info!("Processing complete")
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use rand::thread_rng;
+    use std::path::Path;
+    use super::*;
+
+    #[test]
+    fn test_runner() -> Result<(), NeatErrors>{
+        let config = RunConfiguration::build().build();
+        run_neat(
+            Box::new(config),
+            &mut thread_rng(),
+        ).unwrap();
+        let fastq_file = Path::new("neat_out_r1.fastq").canonicalize().unwrap();
+        fs::remove_file(fastq_file).unwrap();
+        Ok(())
+    }
+}
\ No newline at end of file
diff --git a/src/utils/vcf_tools.rs b/src/utils/vcf_tools.rs
index fa0fafc..26b58af 100644
--- a/src/utils/vcf_tools.rs
+++ b/src/utils/vcf_tools.rs
@@ -6,7 +6,7 @@ use std::io::Write;
 use rand::Rng;
 use rand::rngs::ThreadRng;
 use rand::seq::IndexedRandom;
-use utils::fasta_tools::num_to_char;
+use utils::nucleotides::u8_to_base;
 use utils::file_tools::open_file;
 
 fn genotype_to_string(genotype: Vec<usize>) -> String {
@@ -25,7 +25,7 @@ pub fn write_vcf(
     variant_locations: &HashMap<String, Vec<(usize, u8, u8)>>,
     fasta_order: &Vec<String>,
     ploidy: usize,
-    reference: &str,
+    reference_path: &str,
     overwrite_output: bool,
     output_file_prefix: &str,
     mut rng: &mut ThreadRng,
@@ -36,6 +36,7 @@ pub fn write_vcf(
             consisting of a tuple of (position, alt base, ref base).
         fasta_order: A vector of contig names in the order of the reference fasta.
         ploidy: The number of copies of each chromosome present in the organism
+        reference_path: The location of the reference file this vcf is showing variants from.
         output_file_prefix: The path to the directory and the prefix to use for filenames
         rng: A random number generator for this run
     Result:
@@ -45,10 +46,11 @@ pub fn write_vcf(
     let ploid_numbers: Vec<usize> = (1..ploidy+1).collect();
     // set the filename of the output vcf
     let mut filename = format!("{}.vcf", output_file_prefix);
-    let mut outfile = open_file(&mut filename, overwrite_output);
+    let mut outfile = open_file(&mut filename, overwrite_output)
+        .expect(&format!("Problem opening {} for output.", filename));
     // add the vcf header
     writeln!(&mut outfile, "##fileformat=VCFv4.1")?;
-    writeln!(&mut outfile, "##reference={}", reference)?;
+    writeln!(&mut outfile, "##reference={}", reference_path)?;
     writeln!(&mut outfile, "##Generated by rusty-neat")?;
     writeln!(&mut outfile, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">")?;
     writeln!(&mut outfile, "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">")?;
@@ -87,15 +89,53 @@ pub fn write_vcf(
             // Format the output line. Any fields without data will be a simple period. Quality
             // is set to 37 for all these variants.
             let line = format!("{}\t{}\t.\t{}\t{}\t37\tPASS\t.\tGT\t{}",
-                    contig,
-                    mutation.0 + 1,
-                    num_to_char(mutation.2),
-                    num_to_char(mutation.1),
-                    genotype_to_string(genotype),
+                               contig,
+                               mutation.0 + 1,
+                               u8_to_base(mutation.2),
+                               u8_to_base(mutation.1),
+                               genotype_to_string(genotype),
                 );
 
             writeln!(&mut outfile, "{}", line)?;
         }
     };
     Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fs;
+    use rand::thread_rng;
+    use super::*;
+    use std::path::Path;
+
+    #[test]
+    fn test_genotype_to_string() {
+        let genotype = vec![0, 1, 0];
+        assert_eq!(String::from("0/1/0"), genotype_to_string(genotype));
+    }
+
+    #[test]
+    fn test_write_vcf() {
+        let variant_locations = HashMap::from([
+            ("chr1".to_string(), vec![(3, 0, 1), (7, 1, 2)])
+        ]);
+        let fasta_order = vec!["chr1".to_string()];
+        let ploidy = 2;
+        let reference_path = "/fake/path/to/H1N1.fa";
+        let overwrite_output = false;
+        let output_file_prefix = "test";
+        let mut rng = thread_rng();
+        write_vcf(
+            &variant_locations,
+            &fasta_order,
+            ploidy,
+            reference_path,
+            overwrite_output,
+            output_file_prefix,
+            &mut rng,
+        ).unwrap();
+        assert!(Path::new("test.vcf").exists());
+        fs::remove_file("test.vcf").unwrap();
+    }
 }
\ No newline at end of file