From 3eae33030602a6373901b4aa224a1eca79b15e89 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 11:05:58 +0200 Subject: [PATCH 01/72] Johannes over-simplification --- Cargo.lock | 6 +- Cargo.toml | 2 +- src/main.rs | 267 +++++----------------------------------------------- 3 files changed, 27 insertions(+), 248 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85ef609..65c85e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -402,9 +402,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] @@ -912,8 +912,8 @@ dependencies = [ "clap", "flate2", "indicatif", + "itertools", "lazy_static", - "regex", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 029c7aa..712d2cb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ edition = "2021" [dependencies] clap = { version = "3.2.10", features = ["derive"] } bio = "0.41.0" -regex = "1.6.0" lazy_static = "1.4" indicatif = "0.17.0" flate2 = "1.0.24" +itertools = "0.10.5" diff --git a/src/main.rs b/src/main.rs index ba77eb6..7ee443c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,24 +1,7 @@ use clap::Parser; -use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; +use itertools::izip; use std::iter::Iterator; -use std::thread; -lazy_static::lazy_static! { -static ref UMI_PATTERN: regex::Regex = regex::Regex::new("^(N{2,})([ATCG]*)$").unwrap(); -} -// Nucleotide pattern for inline transfer -struct Nucleotide { - offset: usize, - spacer: String, -} -// Valid extraction of UMI and read for inline transfer -enum ExtractedRecord { - Empty, - Valid { - read: bio::io::fastq::Record, - umi: Vec, - }, -} // Defining types for simplicity type File = std::fs::File; type Fastq = std::io::BufReader; @@ -40,12 +23,7 @@ impl std::io::Read for ReadFile { } // Enum for the two accepted output formats, '.fastq' and '.fastq.gz' enum OutputFile { - Fastq { - read: bio::io::fastq::Writer, - }, - Gzip { - read: bio::io::fastq::Writer>, - }, + Fastq { read: bio::io::fastq::Writer }, } impl OutputFile { // Implement write for OutputFile enum @@ -55,10 +33,6 @@ impl OutputFile { read.write(header, desc, s.seq(), s.qual()).unwrap(); OutputFile::Fastq { read } } - OutputFile::Gzip { mut read } => { - read.write(header, desc, s.seq(), s.qual()).unwrap(); - OutputFile::Gzip { read } - } } } } @@ -75,28 +49,19 @@ fn read_fastq(path: &str) -> bio::io::fastq::Reader bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) } } -// Create output files, gzipped optional -fn output_file(name: &str, gz: bool) -> OutputFile { - if gz { - OutputFile::Gzip { - read: std::fs::File::create(format!("{}.fastq.gz", name)) - .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::best())) - .map(bio::io::fastq::Writer::new) - .unwrap(), - } - } else { - OutputFile::Fastq { - read: std::fs::File::create(format!("{}.fastq", name)) - .map(bio::io::fastq::Writer::new) - .unwrap(), - } +// Create output files +fn output_file(name: &str) -> OutputFile { + OutputFile::Fastq { + read: std::fs::File::create(format!("{}.fastq", name)) + .map(bio::io::fastq::Writer::new) + .unwrap(), } } #[derive(clap::Parser)] #[clap( - version = "0.1.0", - author = "Judit Hohenthal", + version = "0.2.0", + author = "Judit Hohenthal, Matthias Zepper, Johannes Alneberg", about = "A tool for transfering Unique Molecular Identifiers (UMIs)." )] struct Opts { @@ -126,45 +91,8 @@ struct Opts { \n " )] edit_nr: bool, - #[clap( - long, - help = "Disable gzipped output file (its enabled by default). - \n " - )] - no_gzip: bool, - // Subcommands specifying inline or separate extraction - #[clap(subcommand)] - sub: Commands, -} - -#[derive(clap::Subcommand)] -enum Commands { - #[clap( - name = "separate", - about = "If the UMI reads is in separate fastq file 'separate' must be present in command line. - \nUMI is entered after --ru-in flag. - \nExample input: 'umi-transfer --no-gzip --r1-in 'example_file.fastq.gz separate --ru-in 'example_umi.fastq.gz'' - \n " - )] - Separate { - #[clap(long, required = true)] - ru_in: Vec, - }, - #[clap( - name = "inline", - about = "If the UMI appears inline with the input read files 'inline' must be present in command line. - \n--pattern1 a nucleotide pattern must be available to locate UMI in read file 1 - \n--pattern2 a nucleotide pattern must be available to locate UMI if read file 2 exists - \nExample input: 'umi-transfer --no-gzip --r1-in 'example_file.fastq' inline --pattern1 'NNNNNNNNN' - \n " - )] - Inline { - // Patterns for locating UMI inline, given in Nucleotide pattern - #[clap(long, required = true)] - pattern1: String, - #[clap(long)] - pattern2: Option, - }, + #[clap(long, required = true)] + ru_in: Vec, } // Writes record with properly inserted UMI to Output file @@ -186,77 +114,17 @@ fn write_to_file( output.write(header, s.desc(), s.clone()) } } -// Parses Pattern for Inline extraction -fn parse(pattern: &str) -> Option { - if let Some(captures) = UMI_PATTERN.captures(pattern) { - Some(Nucleotide { - offset: captures.get(1)?.end(), - spacer: captures.get(2)?.as_str().into(), - }) - } else { - panic!("") - } -} -// Extracts UMI from inline record -fn extract(record: bio::io::fastq::Record, pattern: &str) -> ExtractedRecord { - let handler = parse(pattern); - match handler { - Some(Nucleotide { offset, spacer }) => { - let end = offset + spacer.len(); - if end <= record.seq().len() && record.seq()[offset..end] == *spacer.as_bytes() { - let read = bio::io::fastq::Record::with_attrs( - record.id(), - record.desc(), - record.seq()[end..record.seq().len()].into(), - record.qual()[end..record.qual().len()].into(), - ); - ExtractedRecord::Valid { - read: read, - umi: record.seq()[0..offset].into(), - } - } else { - ExtractedRecord::Empty - } - } - None => panic!(""), - } -} -// Write inline record to Outputfile -fn write_inline_to_file( - record: ExtractedRecord, - write_file: OutputFile, - second: bool, -) -> OutputFile { - match record { - ExtractedRecord::Empty => panic!("Not Valid UMI/ Record"), - ExtractedRecord::Valid { read, umi } => write_to_file(read, write_file, &umi, second), - } -} fn main() { // Parse commandline arguments let args = Opts::parse(); - // Automatically gzip output file, if --no-gzip flag was included this will be disabled - let mut gzip = true; - if args.no_gzip { - gzip = false; - } // Create write files, not gzipped if --no-gzip flag entered. - let mut write_file_r1 = output_file(&format!("{}1", &args.prefix), gzip); + let mut write_file_r1 = output_file(&format!("{}1", &args.prefix)); // Create a record iterator from input file 1 let r1 = read_fastq(&args.r1_in[0]).records(); - // Settings for progress bar - let len = read_fastq(&args.r1_in[0]).records().count(); - let m = MultiProgress::new(); - let style = ProgressStyle::with_template("[{elapsed_precise}] {bar:60} {pos:>7}/{len:7} {msg}") - .unwrap(); - let pb = m.add(ProgressBar::new(len.try_into().unwrap())); - pb.set_style(style.clone()); - let pb2 = m.insert_after(&pb, ProgressBar::new(len.try_into().unwrap())); - pb2.set_style(style); println!("[1/1] Transfering UMI to records..."); // Enables editing id in output file 2 if --edit-nr flag was included @@ -264,108 +132,19 @@ fn main() { if args.edit_nr { edit_nr = true; } - // Match Subcommand - match args.sub { - Commands::Separate { ru_in } => { - // Clone UMI file for second thread - let ru1 = ru_in.clone(); - let handle1 = thread::spawn(move || { - let ru = read_fastq(&ru_in[0]).records(); - // Iterate records in input file and UMI file - for (r1_rec, ru_rec) in r1.zip(ru) { - // Update progress bar - pb.set_message("R1"); - pb.inc(1); - // Write to Output file - write_file_r1 = - write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.unwrap().seq(), false); - } - pb.finish_with_message("R1 done"); - }); - - // Save thread handler 1 in Vec - let mut l = Vec::new(); - l.push(handle1); - - // If input file 2 exists: - if !&args.r2_in.is_empty() { - let r2 = read_fastq(&args.r2_in[0]).records(); - let mut write_file_r2 = output_file(&format!("{}2", &args.prefix), gzip); - let handle2 = thread::spawn(move || { - let ru = read_fastq(&ru1[0]).records(); - - // Set progressbar to position 0 - pb2.set_position(0); - for (r2_rec, ru_rec) in r2.zip(ru) { - // Update progressbar - pb2.set_message("R2"); - pb2.inc(1); - // Write record to Output file - write_file_r2 = write_to_file( - r2_rec.unwrap(), - write_file_r2, - ru_rec.unwrap().seq(), - edit_nr, - ); - } - pb2.finish_with_message("R2 done"); - }); - // Save thread handler 2 in Vec - l.push(handle2); - } else { - // If no recond input file exists, remove second progress bar - MultiProgress::remove(&m, &pb2); - } - // Wait for threads to finish - for i in l { - if !i.is_finished() { - i.join().unwrap(); - } - } - } - Commands::Inline { pattern1, pattern2 } => { - let handle1 = thread::spawn(move || { - // Iterate each record in input file 1 - for r1_rec in r1 { - // Update progress bar - pb.set_message("FASTQ 1"); - pb.inc(1); - // Extract UMI from record and save both - let record1 = extract(r1_rec.unwrap(), &pattern1); + let ru = read_fastq(&args.ru_in[0]).records(); + let r2 = read_fastq(&args.r2_in[0]).records(); + let mut write_file_r2 = output_file(&format!("{}2", &args.prefix)); - // Write record and extracted UMI to output file - write_file_r1 = write_inline_to_file(record1, write_file_r1, false); - } - pb.finish_with_message("FASTQ 1 done"); - }); + // Iterate records in input file and UMI file + for (r1_rec, ru_rec, r2_rec) in izip!(r1, ru, r2) { + let ru_rec2 = ru_rec.unwrap(); // Error "handling" + let ru2 = ru_rec2.clone(); - // Save thread handler 1 to Vec - let mut l = Vec::new(); - l.push(handle1); + // Write to Output file + write_file_r1 = write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec2.seq(), false); - if !&args.r2_in.is_empty() { - let mut write_file_r2 = output_file(&format!("{}2", &args.prefix), gzip); - let r2 = read_fastq(&args.r2_in[0]).records(); - pb2.set_position(0); - let handle2 = thread::spawn(move || { - for r2_rec in r2 { - pb2.set_message("FASTQ 2"); - pb2.inc(1); - let record2 = extract(r2_rec.unwrap(), &(pattern2.as_ref().unwrap())); - write_file_r2 = write_inline_to_file(record2, write_file_r2, false); - } - pb2.finish_with_message("FASTQ 2 done"); - }); - l.push(handle2); - } else { - MultiProgress::remove(&m, &pb2); - } - for i in l { - if !i.is_finished() { - i.join().unwrap(); - } - } - } + write_file_r2 = write_to_file(r2_rec.unwrap(), write_file_r2, ru2.seq(), edit_nr); } } From 83c8612c6bbf5c49e885fd93370ad715808da80a Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 16:05:47 +0200 Subject: [PATCH 02/72] Tidying up a bit --- src/main.rs | 60 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/src/main.rs b/src/main.rs index 7ee443c..2e32227 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,6 +12,7 @@ enum ReadFile { Fastq(File), Gzip(Gzip), } + impl std::io::Read for ReadFile { // Implement read for ReadFile enum fn read(&mut self, into: &mut [u8]) -> std::io::Result { @@ -21,10 +22,12 @@ impl std::io::Read for ReadFile { } } } + // Enum for the two accepted output formats, '.fastq' and '.fastq.gz' enum OutputFile { Fastq { read: bio::io::fastq::Writer }, } + impl OutputFile { // Implement write for OutputFile enum fn write(self, header: &str, desc: Option<&str>, s: bio::io::fastq::Record) -> OutputFile { @@ -36,6 +39,7 @@ impl OutputFile { } } } + // Read input file to Reader. Automatically scans if gzipped from .gz suffix fn read_fastq(path: &str) -> bio::io::fastq::Reader> { if path.ends_with(".gz") { @@ -49,6 +53,7 @@ fn read_fastq(path: &str) -> bio::io::fastq::Reader bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) } } + // Create output files fn output_file(name: &str) -> OutputFile { OutputFile::Fastq { @@ -62,16 +67,22 @@ fn output_file(name: &str) -> OutputFile { #[clap( version = "0.2.0", author = "Judit Hohenthal, Matthias Zepper, Johannes Alneberg", - about = "A tool for transfering Unique Molecular Identifiers (UMIs)." + about = "A tool for transfering Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" )] struct Opts { #[clap( long, - default_value = "integrated", + default_value = "output", help = "Prefix for output files, omitted flag will result in default value. \n " )] prefix: String, + #[clap( + long, + help = "Automatically change '3' into '2' in sequence header of output file from R3. + \n " + )] + edit_nr: bool, #[clap( long, required = true, @@ -81,17 +92,17 @@ struct Opts { r1_in: Vec, #[clap( long, - help = "Input file 2 with reads. + required = true, + help = "[REQUIRED] Input file 2 with reads. \n " )] r2_in: Vec, #[clap( long, - help = "Automatically change '3' into '2' in header of output file from R3. - \n " + required = true, + help = "[REQUIRED] Input file with UMI. + \n" )] - edit_nr: bool, - #[clap(long, required = true)] ru_in: Vec, } @@ -100,10 +111,10 @@ fn write_to_file( input: bio::io::fastq::Record, output: OutputFile, umi: &[u8], - second: bool, + edit_nr: bool, ) -> OutputFile { let s = input; - if second { + if edit_nr { let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); let mut string = String::from(s.desc().unwrap()); string.replace_range(0..1, "2"); @@ -119,32 +130,31 @@ fn main() { // Parse commandline arguments let args = Opts::parse(); - // Create write files, not gzipped if --no-gzip flag entered. - let mut write_file_r1 = output_file(&format!("{}1", &args.prefix)); - - // Create a record iterator from input file 1 - let r1 = read_fastq(&args.r1_in[0]).records(); - - println!("[1/1] Transfering UMI to records..."); - // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { edit_nr = true; } - let ru = read_fastq(&args.ru_in[0]).records(); + // Create fastq record iterators from input files + let r1 = read_fastq(&args.r1_in[0]).records(); let r2 = read_fastq(&args.r2_in[0]).records(); + let ru = read_fastq(&args.ru_in[0]).records(); + + // Create write files. + let mut write_file_r1 = output_file(&format!("{}1", &args.prefix)); let mut write_file_r2 = output_file(&format!("{}2", &args.prefix)); - // Iterate records in input file and UMI file - for (r1_rec, ru_rec, r2_rec) in izip!(r1, ru, r2) { - let ru_rec2 = ru_rec.unwrap(); // Error "handling" - let ru2 = ru_rec2.clone(); + println!("Transfering UMIs to records..."); - // Write to Output file - write_file_r1 = write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec2.seq(), false); + // Iterate over records in input files + for (r1_rec, ru_rec_res, r2_rec) in izip!(r1, ru, r2) { + let ru_rec = ru_rec_res.unwrap(); + // Write to Output file (never edit nr for R1) + write_file_r1 = write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.seq(), false); - write_file_r2 = write_to_file(r2_rec.unwrap(), write_file_r2, ru2.seq(), edit_nr); + let ru_rec2 = ru_rec.clone(); + // Write to Output file (edit nr for R2 if --edit-nr flag was included) + write_file_r2 = write_to_file(r2_rec.unwrap(), write_file_r2, ru_rec2.seq(), edit_nr); } } From 3d6a18fce3a49a5bfdfeb9fb1c3eb38ce2c570ad Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 17:03:20 +0200 Subject: [PATCH 03/72] Added back option to gzip output --- src/main.rs | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 2e32227..9fd2e10 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,8 +13,8 @@ enum ReadFile { Gzip(Gzip), } +// Implement read for ReadFile enum impl std::io::Read for ReadFile { - // Implement read for ReadFile enum fn read(&mut self, into: &mut [u8]) -> std::io::Result { match self { ReadFile::Fastq(file) => file.read(into), @@ -25,17 +25,26 @@ impl std::io::Read for ReadFile { // Enum for the two accepted output formats, '.fastq' and '.fastq.gz' enum OutputFile { - Fastq { read: bio::io::fastq::Writer }, + Fastq { + read: bio::io::fastq::Writer, + }, + Gzip { + read: bio::io::fastq::Writer>, + }, } +// Implement write for OutputFile enum impl OutputFile { - // Implement write for OutputFile enum fn write(self, header: &str, desc: Option<&str>, s: bio::io::fastq::Record) -> OutputFile { match self { OutputFile::Fastq { mut read } => { read.write(header, desc, s.seq(), s.qual()).unwrap(); OutputFile::Fastq { read } } + OutputFile::Gzip { mut read } => { + read.write(header, desc, s.seq(), s.qual()).unwrap(); + OutputFile::Gzip { read } + } } } } @@ -55,11 +64,20 @@ fn read_fastq(path: &str) -> bio::io::fastq::Reader } // Create output files -fn output_file(name: &str) -> OutputFile { - OutputFile::Fastq { - read: std::fs::File::create(format!("{}.fastq", name)) - .map(bio::io::fastq::Writer::new) - .unwrap(), +fn output_file(name: &str, gz: bool) -> OutputFile { + if gz { + OutputFile::Gzip { + read: std::fs::File::create(format!("{}.fastq.gz", name)) + .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } + } else { + OutputFile::Fastq { + read: std::fs::File::create(format!("{}.fastq", name)) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } } } @@ -104,6 +122,12 @@ struct Opts { \n" )] ru_in: Vec, + #[clap( + long, + help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). + \n " + )] + gzip: bool, } // Writes record with properly inserted UMI to Output file @@ -142,8 +166,8 @@ fn main() { let ru = read_fastq(&args.ru_in[0]).records(); // Create write files. - let mut write_file_r1 = output_file(&format!("{}1", &args.prefix)); - let mut write_file_r2 = output_file(&format!("{}2", &args.prefix)); + let mut write_file_r1 = output_file(&format!("{}1", &args.prefix), args.gzip); + let mut write_file_r2 = output_file(&format!("{}2", &args.prefix), args.gzip); println!("Transfering UMIs to records..."); From b8620263408c77db506cd65244cad3bcf9d49df8 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 17:10:53 +0200 Subject: [PATCH 04/72] Started adjusting the readme --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index dec76b8..dde59e4 100644 --- a/README.md +++ b/README.md @@ -35,15 +35,17 @@ The tool requires an input as follows: Running the tool can be done by `cargo run --release -- [options] --r1-in 'fastq' `, where the `--release` flag is optional, but will ensure an optimized build.
-### Inline UMI example: +### Inline UMI example `cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R2.fastq' inline --pattern1 'NNNNNNNNN' --pattern2 'NNNNNNNNN'` -### UMI in seperate file example: +### UMI in seperate file example -`cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' separate --ru-in 'R2.fastq'` +```shell +cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' separate --ru-in 'R2.fastq' +``` -### Special flags: +### Special flags > `--edit-nr` This flag will automatically change the '3' in the R3 files record-headers. Its disabled by default. > `--no-gzip` This flag diables automatic compression (.gz) of output files. From bfe147b6c0511ee6e3f265c757b9ada68bcb3257 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 17:14:50 +0200 Subject: [PATCH 05/72] Readme table formatting --- README.md | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index dde59e4..5614ea8 100644 --- a/README.md +++ b/README.md @@ -20,24 +20,26 @@ The tool requires an input as follows: `SUBCOMMANDS: ` -> `inline:` -> -> > | Flag | Required | Description | -> > | ------------ | :------------------------: | -------------------------: | -> > | `--pattern1` | Yes | Nucleotide Pattern for UMI | -> > | `--pattern2` | Needed if `--r2-in` exists | Nucleotide Pattern for UMI | -> -> `separate:` -> -> > | Flag | Required | Description | -> > | --------- | :------: | ---------------------------: | -> > | `--ru-in` | Yes | FASTQ containing UMI records | +`inline:` +| +| Flag | Required | Description | +| ------------ | :------------------------: | -------------------------: | +| `--pattern1` | Yes | Nucleotide Pattern for UMI | +| `--pattern2` | Needed if `--r2-in` exists | Nucleotide Pattern for UMI | +| +| `separate:` +| +| Flag | Required | Description | +| --------- | :------: | ---------------------------: | +| `--ru-in` | Yes | FASTQ containing UMI records | Running the tool can be done by `cargo run --release -- [options] --r1-in 'fastq' `, where the `--release` flag is optional, but will ensure an optimized build.
### Inline UMI example -`cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R2.fastq' inline --pattern1 'NNNNNNNNN' --pattern2 'NNNNNNNNN'` +```shell +cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R2.fastq' inline --pattern1 'NNNNNNNNN' --pattern2 'NNNNNNNNN' +``` ### UMI in seperate file example @@ -47,5 +49,5 @@ cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' s ### Special flags -> `--edit-nr` This flag will automatically change the '3' in the R3 files record-headers. Its disabled by default. -> `--no-gzip` This flag diables automatic compression (.gz) of output files. +`--edit-nr` This flag will automatically change the '3' in the R3 files record-headers. Its disabled by default. +`--no-gzip` This flag diables automatic compression (.gz) of output files. From 743ca2d30fddfd3df2645d0dbc4c6a9f97c5d113 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 17:25:48 +0200 Subject: [PATCH 06/72] more readme changes --- README.md | 80 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 5614ea8..dc44f3e 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,53 @@ -# Building +# umi-transfer +A tool for transfering Unique Molecular Identifiers (UMIs). -Go to the directory with the tool and type in `cargo build` . - -# Running +The UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the +header of the first two fastq files. -### Usage +## Installation +TODO +## Usage The tool requires an input as follows: -> `umi-transfer [OPTIONS] `
- -`OPTIONS:` -| Flag | Required | Description | -| ------------- | :-----------: | ----------: | -| `-h`,`--help` | No | Print help information | -| `--prefix` | No, but default will be '`integrated`' | dictates name of output files| -| `--r1-in` | Yes | FASTQ file with reads| -| `--r2-in` | No | FASTQ file with reads | - -`SUBCOMMANDS: ` - -`inline:` -| -| Flag | Required | Description | -| ------------ | :------------------------: | -------------------------: | -| `--pattern1` | Yes | Nucleotide Pattern for UMI | -| `--pattern2` | Needed if `--r2-in` exists | Nucleotide Pattern for UMI | -| -| `separate:` -| -| Flag | Required | Description | -| --------- | :------: | ---------------------------: | -| `--ru-in` | Yes | FASTQ containing UMI records | +```bash +umi-transfer 0.2.0 +Judit Hohenthal, Matthias Zepper, Johannes Alneberg +A tool for transfering Unique Molecular Identifiers (UMIs). + +The UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the +header of the first two fastq files. + + +USAGE: + umi-transfer [OPTIONS] --r1-in --r2-in --ru-in + +OPTIONS: + --edit-nr Automatically change '3' into '2' in sequence header of output file + from R3. + + --gzip Compress output files with gzip. By default turned off to encourage use + of external compression (see Readme). + + -h, --help Print help information + --prefix Prefix for output files, omitted flag will result in default value. + + [default: output] + --r1-in [REQUIRED] Input file 1 with reads. + + + --r2-in [REQUIRED] Input file 2 with reads. + + + --ru-in [REQUIRED] Input file with UMI. + + -V, --version Print version information +``` Running the tool can be done by `cargo run --release -- [options] --r1-in 'fastq' `, where the `--release` flag is optional, but will ensure an optimized build.
-### Inline UMI example -```shell -cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R2.fastq' inline --pattern1 'NNNNNNNNN' --pattern2 'NNNNNNNNN' -``` - -### UMI in seperate file example +### Example ```shell cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' separate --ru-in 'R2.fastq' @@ -51,3 +57,7 @@ cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' s `--edit-nr` This flag will automatically change the '3' in the R3 files record-headers. Its disabled by default. `--no-gzip` This flag diables automatic compression (.gz) of output files. + +## For developers + +Go to the directory with the tool and type in `cargo build` . From 2ff5e620995dede3f7eeff463aa313f1b4ef1c91 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 13 Apr 2023 22:26:08 +0200 Subject: [PATCH 07/72] Wrote the performance guide --- README.md | 77 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index dc44f3e..9161ec2 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,27 @@ The UMIs are given as a fastq file and will be transferred, explaining the name header of the first two fastq files. ## Installation -TODO + +### Compile from source +Given that you have [rust installed](https://www.rust-lang.org/tools/install) on your computer, download this repo and run +```shell +cargo build --release +``` +That should create an executable `target/release/umi-transfer` that can be placed anywhere in your `$PATH` or be executed directly by specifying its' path: + +```shell +./target/release/umi-transfer --version +umi-transfer 0.2.0 +``` ## Usage -The tool requires an input as follows: +>### Performance Note: +>The decompression and compression used within umi-transfer is single-threaded, so to get the most reads per minute performance, see the [high performance guide](#high-performance-guide) + +The tool requires three fastq files and additionally accepts flags to adjust the behaviour as can be seen from the help message: -```bash +```raw +$ umi-transfer --help umi-transfer 0.2.0 Judit Hohenthal, Matthias Zepper, Johannes Alneberg A tool for transfering Unique Molecular Identifiers (UMIs). @@ -44,20 +59,62 @@ OPTIONS: -V, --version Print version information ``` -Running the tool can be done by `cargo run --release -- [options] --r1-in 'fastq' `, where the `--release` flag is optional, but will ensure an optimized build.
- - ### Example ```shell -cargo run --release -- --prefix 'output' --r1-in 'R1.fastq' --r2-in 'R3.fastq' separate --ru-in 'R2.fastq' +cargo run --release -- --prefix 'output' --edit-nr --r1-in 'R1.fastq' --r2-in 'R3.fastq' --ru-in 'R2.fastq' ``` -### Special flags +### High Performance Guide +If you have more than one thread available on your computer and would like to process the read files as quickly as possible we recommend to use unix FIFOs (First In First Out) to handle decompression and compression of the fastq files. +This can be done as follows, given that you have your input files compressed as `fastq.gz`, first create FIFOs to represent your uncompressed input files: -`--edit-nr` This flag will automatically change the '3' in the R3 files record-headers. Its disabled by default. -`--no-gzip` This flag diables automatic compression (.gz) of output files. +```shell +$ mkfifo read1.fastq +$ mkfifo read2.fastq +$ mkfifo read3.fastq +``` +and then we use `zcat` to decompress our input files and send it to the pipe that the FIFOs represent: +```shell +$ zcat read1.fastq.gz > read1.fastq & +[1] 233387 +$ zcat read2.fastq.gz > read2.fastq & +[2] 233388 +$ zcat read3.fastq.gz > read3.fastq & +[3] 233389 +``` +Note the trailing `&` to leave these processes running in the background. We can inspect the directory with `ls`: +```shell +$ ls -lh +total 1.5K +-rw-rw----. 1 alneberg ngi2016004 4.5G Apr 13 12:18 read1.fastq.gz +-rw-rw----. 1 alneberg ngi2016004 1.1G Apr 13 12:18 read2.fastq.gz +-rw-rw----. 1 alneberg ngi2016004 4.5G Apr 13 12:18 read3.fastq.gz +prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read1.fastq +prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read2.fastq +prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read3.fastq +``` +We continue to create corresponding FIFOs for the output files (note that the filenames need to match the value given to `--prefix`) +```shell +$ mkfifo output1.fastq +$ mkfifo output2.fastq +$ pigz -p 10 --stdout > output1.fastq.gz < output1.fastq & +[4] 233394 +$ pigz -p 10 --stdout > output2.fastq.gz < output2.fastq & +[5] 233395 +``` +The value `10` is how many threads each of the `pigz` processes is allowed to use. +The optimal value for this depends on several factors and for optimal performance you will have to do some testing on your exact hardware. +We can then run the `umi-transfer` program as follows: +```shell +$ umi-transfer --prefix output --edit-nr --r1-in read1.fastq --r2-in read3.fastq --ru-in read2.fastq +``` + +It's good practice to remove the FIFOs after the program has finished: +```shell +rm read*.fastq output*.fastq +``` ## For developers Go to the directory with the tool and type in `cargo build` . From 532faac0e0cb0c735eb427358017f2fdffb0f998 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Fri, 14 Apr 2023 15:32:31 +0200 Subject: [PATCH 08/72] Version 0.2.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65c85e3..a188cc3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -906,7 +906,7 @@ checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] name = "umi-transfer" -version = "0.1.0" +version = "0.2.0" dependencies = [ "bio", "clap", diff --git a/Cargo.toml b/Cargo.toml index 712d2cb..a8c82e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "umi-transfer" -version = "0.1.0" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 94bd11f0ca22d8be01c1aff34a70bad5de5f37c3 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Fri, 14 Apr 2023 15:33:02 +0200 Subject: [PATCH 09/72] Added background and for developers sections to README --- README.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9161ec2..1a86226 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ A tool for transfering Unique Molecular Identifiers (UMIs). The UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. +## Background +Common demultiplexing softwares return a separate fastq file, usually named `R2`, containing UMIs. +However, common analysis tools does not allow for this and instead requires the UMI to be contained within the header of the two reads in the pair. +This tools performs this transform in an efficient manner and can also conveniently rename the oddly named read-`3` to read-`2` which is probably more widely recognized. ## Installation @@ -116,5 +120,13 @@ It's good practice to remove the FIFOs after the program has finished: rm read*.fastq output*.fastq ``` ## For developers +To make modifications to `umi-transfer`, clone this repository, make your changes and then run the code with +```shell +cargo run -- +``` +or build the executable with +```shell +cargo build --release +``` -Go to the directory with the tool and type in `cargo build` . +Please make sure to activate code formatting by `rust-analyzer`. From 86135d8ca291e6a37d46d22a55388feae65dcac1 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Tue, 18 Apr 2023 17:05:06 +0200 Subject: [PATCH 10/72] Use file-format crate to check for gzipped file --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/main.rs | 7 +++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a188cc3..7f6ca32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -301,6 +301,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" +[[package]] +name = "file-format" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fb46518b6034ba6dbc075ca73277d66cbd488c651c2ccc7255c62b00ce48d24" + [[package]] name = "fixedbitset" version = "0.4.2" @@ -910,6 +916,7 @@ version = "0.2.0" dependencies = [ "bio", "clap", + "file-format", "flate2", "indicatif", "itertools", diff --git a/Cargo.toml b/Cargo.toml index a8c82e2..2347d24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ lazy_static = "1.4" indicatif = "0.17.0" flate2 = "1.0.24" itertools = "0.10.5" +file-format = "0.7.0" diff --git a/src/main.rs b/src/main.rs index 9fd2e10..bcafeb1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ use clap::Parser; +use file_format::FileFormat; use itertools::izip; use std::iter::Iterator; @@ -49,9 +50,10 @@ impl OutputFile { } } -// Read input file to Reader. Automatically scans if gzipped from .gz suffix +// Read input file to Reader. Automatically scans if gzipped from file-format crate fn read_fastq(path: &str) -> bio::io::fastq::Reader> { - if path.ends_with(".gz") { + let format = FileFormat::from_file(path).unwrap(); + if format == FileFormat::Gzip { bio::io::fastq::Reader::new(ReadFile::Gzip( std::fs::File::open(path) .map(std::io::BufReader::new) @@ -59,6 +61,7 @@ fn read_fastq(path: &str) -> bio::io::fastq::Reader .unwrap(), )) } else { + // If not gzipped, read as plain fastq bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) } } From b379998f114e61f4b966b67a23c70a65bf623ed0 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Tue, 18 Apr 2023 17:07:26 +0200 Subject: [PATCH 11/72] Added 'written by' to authors list in clap --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index bcafeb1..bc18642 100644 --- a/src/main.rs +++ b/src/main.rs @@ -87,7 +87,7 @@ fn output_file(name: &str, gz: bool) -> OutputFile { #[derive(clap::Parser)] #[clap( version = "0.2.0", - author = "Judit Hohenthal, Matthias Zepper, Johannes Alneberg", + author = "Written by Judit Hohenthal, Matthias Zepper, Johannes Alneberg", about = "A tool for transfering Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" )] struct Opts { From 6d3fe059e1342bebca49ce241588cf80bf2ca36a Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Tue, 18 Apr 2023 17:34:39 +0200 Subject: [PATCH 12/72] Failed splitting, cannot reach file_io from umi_external --- src/file_io.rs | 102 +++++++++++++++++++++++++++++++++ src/main.rs | 133 +------------------------------------------- src/umi_external.rs | 34 +++++++++++ 3 files changed, 139 insertions(+), 130 deletions(-) create mode 100644 src/file_io.rs create mode 100644 src/umi_external.rs diff --git a/src/file_io.rs b/src/file_io.rs new file mode 100644 index 0000000..484835d --- /dev/null +++ b/src/file_io.rs @@ -0,0 +1,102 @@ +use file_format::FileFormat; + +// Defining types for simplicity +type File = std::fs::File; +type Fastq = std::io::BufReader; +type Gzip = flate2::bufread::MultiGzDecoder; + +// Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' +pub enum ReadFile { + Fastq(File), + Gzip(Gzip), +} + +// Implement read for ReadFile enum +impl std::io::Read for ReadFile { + fn read(&mut self, into: &mut [u8]) -> std::io::Result { + match self { + ReadFile::Fastq(file) => file.read(into), + ReadFile::Gzip(file) => file.read(into), + } + } +} + +// Enum for the two accepted output formats, '.fastq' and '.fastq.gz' +pub enum OutputFile { + Fastq { + read: bio::io::fastq::Writer, + }, + Gzip { + read: bio::io::fastq::Writer>, + }, +} + +// Implement write for OutputFile enum +impl OutputFile { + pub fn write(self, header: &str, desc: Option<&str>, s: bio::io::fastq::Record) -> OutputFile { + match self { + OutputFile::Fastq { mut read } => { + read.write(header, desc, s.seq(), s.qual()).unwrap(); + OutputFile::Fastq { read } + } + OutputFile::Gzip { mut read } => { + read.write(header, desc, s.seq(), s.qual()).unwrap(); + OutputFile::Gzip { read } + } + } + } +} + +// Read input file to Reader. Automatically scans if gzipped from file-format crate +pub fn read_fastq(path: &str) -> bio::io::fastq::Reader> { + let format = FileFormat::from_file(path).unwrap(); + if format == FileFormat::Gzip { + bio::io::fastq::Reader::new(ReadFile::Gzip( + std::fs::File::open(path) + .map(std::io::BufReader::new) + .map(flate2::bufread::MultiGzDecoder::new) + .unwrap(), + )) + } else { + // If not gzipped, read as plain fastq + bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) + } +} + +// Create output files +pub fn output_file(name: &str, gz: bool) -> OutputFile { + if gz { + OutputFile::Gzip { + read: std::fs::File::create(format!("{}.fastq.gz", name)) + .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } + } else { + OutputFile::Fastq { + read: std::fs::File::create(format!("{}.fastq", name)) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } + } +} + +// Writes record with properly inserted UMI to Output file +pub fn write_to_file( + input: bio::io::fastq::Record, + output: OutputFile, + umi: &[u8], + edit_nr: bool, +) -> OutputFile { + let s = input; + if edit_nr { + let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + let mut string = String::from(s.desc().unwrap()); + string.replace_range(0..1, "2"); + let desc: Option<&str> = Some(&string); + output.write(header, desc, s) + } else { + let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + output.write(header, s.desc(), s.clone()) + } +} diff --git a/src/main.rs b/src/main.rs index bc18642..b5022d3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,88 +1,7 @@ use clap::Parser; -use file_format::FileFormat; -use itertools::izip; -use std::iter::Iterator; -// Defining types for simplicity -type File = std::fs::File; -type Fastq = std::io::BufReader; -type Gzip = flate2::bufread::MultiGzDecoder; - -// Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' -enum ReadFile { - Fastq(File), - Gzip(Gzip), -} - -// Implement read for ReadFile enum -impl std::io::Read for ReadFile { - fn read(&mut self, into: &mut [u8]) -> std::io::Result { - match self { - ReadFile::Fastq(file) => file.read(into), - ReadFile::Gzip(file) => file.read(into), - } - } -} - -// Enum for the two accepted output formats, '.fastq' and '.fastq.gz' -enum OutputFile { - Fastq { - read: bio::io::fastq::Writer, - }, - Gzip { - read: bio::io::fastq::Writer>, - }, -} - -// Implement write for OutputFile enum -impl OutputFile { - fn write(self, header: &str, desc: Option<&str>, s: bio::io::fastq::Record) -> OutputFile { - match self { - OutputFile::Fastq { mut read } => { - read.write(header, desc, s.seq(), s.qual()).unwrap(); - OutputFile::Fastq { read } - } - OutputFile::Gzip { mut read } => { - read.write(header, desc, s.seq(), s.qual()).unwrap(); - OutputFile::Gzip { read } - } - } - } -} - -// Read input file to Reader. Automatically scans if gzipped from file-format crate -fn read_fastq(path: &str) -> bio::io::fastq::Reader> { - let format = FileFormat::from_file(path).unwrap(); - if format == FileFormat::Gzip { - bio::io::fastq::Reader::new(ReadFile::Gzip( - std::fs::File::open(path) - .map(std::io::BufReader::new) - .map(flate2::bufread::MultiGzDecoder::new) - .unwrap(), - )) - } else { - // If not gzipped, read as plain fastq - bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) - } -} - -// Create output files -fn output_file(name: &str, gz: bool) -> OutputFile { - if gz { - OutputFile::Gzip { - read: std::fs::File::create(format!("{}.fastq.gz", name)) - .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) - .map(bio::io::fastq::Writer::new) - .unwrap(), - } - } else { - OutputFile::Fastq { - read: std::fs::File::create(format!("{}.fastq", name)) - .map(bio::io::fastq::Writer::new) - .unwrap(), - } - } -} +pub mod file_io; +mod umi_external; #[derive(clap::Parser)] #[clap( @@ -133,55 +52,9 @@ struct Opts { gzip: bool, } -// Writes record with properly inserted UMI to Output file -fn write_to_file( - input: bio::io::fastq::Record, - output: OutputFile, - umi: &[u8], - edit_nr: bool, -) -> OutputFile { - let s = input; - if edit_nr { - let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); - let mut string = String::from(s.desc().unwrap()); - string.replace_range(0..1, "2"); - let desc: Option<&str> = Some(&string); - output.write(header, desc, s) - } else { - let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); - output.write(header, s.desc(), s.clone()) - } -} - fn main() { // Parse commandline arguments let args = Opts::parse(); - // Enables editing id in output file 2 if --edit-nr flag was included - let mut edit_nr = false; - if args.edit_nr { - edit_nr = true; - } - - // Create fastq record iterators from input files - let r1 = read_fastq(&args.r1_in[0]).records(); - let r2 = read_fastq(&args.r2_in[0]).records(); - let ru = read_fastq(&args.ru_in[0]).records(); - - // Create write files. - let mut write_file_r1 = output_file(&format!("{}1", &args.prefix), args.gzip); - let mut write_file_r2 = output_file(&format!("{}2", &args.prefix), args.gzip); - - println!("Transfering UMIs to records..."); - - // Iterate over records in input files - for (r1_rec, ru_rec_res, r2_rec) in izip!(r1, ru, r2) { - let ru_rec = ru_rec_res.unwrap(); - // Write to Output file (never edit nr for R1) - write_file_r1 = write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.seq(), false); - - let ru_rec2 = ru_rec.clone(); - // Write to Output file (edit nr for R2 if --edit-nr flag was included) - write_file_r2 = write_to_file(r2_rec.unwrap(), write_file_r2, ru_rec2.seq(), edit_nr); - } + umi_external::run(args); } diff --git a/src/umi_external.rs b/src/umi_external.rs new file mode 100644 index 0000000..b1902b9 --- /dev/null +++ b/src/umi_external.rs @@ -0,0 +1,34 @@ +use itertools::izip; + +use file_io; + +pub fn run(args: clap::Opts) { + // Enables editing id in output file 2 if --edit-nr flag was included + let mut edit_nr = false; + if args.edit_nr { + edit_nr = true; + } + + // Create fastq record iterators from input files + let r1 = file_io::read_fastq(&args.r1_in[0]).records(); + let r2 = file_io::read_fastq(&args.r2_in[0]).records(); + let ru = file_io::read_fastq(&args.ru_in[0]).records(); + + // Create write files. + let mut write_file_r1 = file_io::output_file(&format!("{}1", &args.prefix), args.gzip); + let mut write_file_r2 = file_io::output_file(&format!("{}2", &args.prefix), args.gzip); + + println!("Transfering UMIs to records..."); + + // Iterate over records in input files + for (r1_rec, ru_rec_res, r2_rec) in izip!(r1, ru, r2) { + let ru_rec = ru_rec_res.unwrap(); + // Write to Output file (never edit nr for R1) + write_file_r1 = file_io::write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.seq(), false); + + let ru_rec2 = ru_rec.clone(); + // Write to Output file (edit nr for R2 if --edit-nr flag was included) + write_file_r2 = + file_io::write_to_file(r2_rec.unwrap(), write_file_r2, ru_rec2.seq(), edit_nr); + } +} From c1f4dcb45d1f9cadba4caff499cabe743ea90e34 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 20 Apr 2023 09:25:09 +0200 Subject: [PATCH 13/72] Managed to use modules --- src/main.rs | 4 ++-- src/umi_external.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index b5022d3..f15935a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use clap::Parser; -pub mod file_io; +mod file_io; mod umi_external; #[derive(clap::Parser)] @@ -9,7 +9,7 @@ mod umi_external; author = "Written by Judit Hohenthal, Matthias Zepper, Johannes Alneberg", about = "A tool for transfering Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" )] -struct Opts { +pub struct Opts { #[clap( long, default_value = "output", diff --git a/src/umi_external.rs b/src/umi_external.rs index b1902b9..5b9b80f 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,8 +1,8 @@ use itertools::izip; -use file_io; +use super::file_io; -pub fn run(args: clap::Opts) { +pub fn run(args: super::Opts) { // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { From d00f40fc4b1a7a626f0a9fd8054ddc48f631eed3 Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Thu, 20 Apr 2023 09:25:44 +0200 Subject: [PATCH 14/72] Added a gitignore --- .gitignore | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9b96c89 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +# We have so far commited Cargo.lock, so need decision if we want to remove it +# Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +# Test data +test_*.fastq.gz \ No newline at end of file From 723b08bef3ad1e2430c939170d08f0678c3c30ff Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 3 May 2023 15:40:22 +0200 Subject: [PATCH 15/72] Adding some basic error handling. --- Cargo.lock | 5 +++-- Cargo.toml | 1 + src/main.rs | 10 ++++++---- src/umi_external.rs | 26 +++++++++++++++++++------- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f6ca32..298d3f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.58" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" [[package]] name = "approx" @@ -914,6 +914,7 @@ checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" name = "umi-transfer" version = "0.2.0" dependencies = [ + "anyhow", "bio", "clap", "file-format", diff --git a/Cargo.toml b/Cargo.toml index 2347d24..f8db807 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,3 +13,4 @@ indicatif = "0.17.0" flate2 = "1.0.24" itertools = "0.10.5" file-format = "0.7.0" +anyhow = "1.0.71" diff --git a/src/main.rs b/src/main.rs index f15935a..1033e06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +use anyhow::{Context, Result}; use clap::Parser; mod file_io; @@ -7,7 +8,7 @@ mod umi_external; #[clap( version = "0.2.0", author = "Written by Judit Hohenthal, Matthias Zepper, Johannes Alneberg", - about = "A tool for transfering Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" + about = "A tool for transferring Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" )] pub struct Opts { #[clap( @@ -52,9 +53,10 @@ pub struct Opts { gzip: bool, } -fn main() { - // Parse commandline arguments +fn main() -> Result<()> { + // Parse command line arguments let args = Opts::parse(); - umi_external::run(args); + umi_external::run(args).context("Failed to include the UMIs")?; + Ok(()) } diff --git a/src/umi_external.rs b/src/umi_external.rs index 5b9b80f..b0263c1 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,8 +1,9 @@ +use anyhow::{Context, Result}; use itertools::izip; use super::file_io; -pub fn run(args: super::Opts) { +pub fn run(args: super::Opts) -> Result<()> { // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { @@ -18,17 +19,28 @@ pub fn run(args: super::Opts) { let mut write_file_r1 = file_io::output_file(&format!("{}1", &args.prefix), args.gzip); let mut write_file_r2 = file_io::output_file(&format!("{}2", &args.prefix), args.gzip); - println!("Transfering UMIs to records..."); + println!("Transferring UMIs to records..."); // Iterate over records in input files for (r1_rec, ru_rec_res, r2_rec) in izip!(r1, ru, r2) { - let ru_rec = ru_rec_res.unwrap(); + let ru_rec = ru_rec_res + .with_context(|| format!("Failed to read records from {}", &args.ru_in[0]))?; + // Write to Output file (never edit nr for R1) - write_file_r1 = file_io::write_to_file(r1_rec.unwrap(), write_file_r1, ru_rec.seq(), false); + write_file_r1 = file_io::write_to_file( + r1_rec.with_context(|| format!("Failed to read records from {}", &args.r1_in[0]))?, + write_file_r1, + &ru_rec.seq(), + false, + ); - let ru_rec2 = ru_rec.clone(); // Write to Output file (edit nr for R2 if --edit-nr flag was included) - write_file_r2 = - file_io::write_to_file(r2_rec.unwrap(), write_file_r2, ru_rec2.seq(), edit_nr); + write_file_r2 = file_io::write_to_file( + r2_rec.with_context(|| format!("Failed to read records from {}", &args.r2_in[0]))?, + write_file_r2, + &ru_rec.seq(), + edit_nr, + ); } + Ok(()) } From b2c502bc62c1f9f113ab44e5d0988f9c05991fcf Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 3 May 2023 20:32:39 +0200 Subject: [PATCH 16/72] Implemented some basic error handling. --- src/main.rs | 1 + src/umi_errors.rs | 19 +++++++++++++++++++ src/umi_external.rs | 37 ++++++++++++++++++++----------------- 3 files changed, 40 insertions(+), 17 deletions(-) create mode 100644 src/umi_errors.rs diff --git a/src/main.rs b/src/main.rs index 1033e06..2a65b3c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ use anyhow::{Context, Result}; use clap::Parser; mod file_io; +mod umi_errors; mod umi_external; #[derive(clap::Parser)] diff --git a/src/umi_errors.rs b/src/umi_errors.rs new file mode 100644 index 0000000..54075a8 --- /dev/null +++ b/src/umi_errors.rs @@ -0,0 +1,19 @@ +#[derive(Debug)] +pub enum RuntimeErrors { + ReadIDMismatchError, + FileNotFoundError, + GeneralError, +} + +impl std::fmt::Display for RuntimeErrors { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::ReadIDMismatchError => write!( + f, + "IDs of UMI and read records mismatch. Please provide sorted files!" + ), + Self::FileNotFoundError => write!(f, "Cannot read from specified path."), + Self::GeneralError => write!(f, "Encountered an error."), + } + } +} diff --git a/src/umi_external.rs b/src/umi_external.rs index b0263c1..063c5c9 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,7 +1,8 @@ -use anyhow::{Context, Result}; +use anyhow::{anyhow, Context, Result}; use itertools::izip; use super::file_io; +use crate::umi_errors::RuntimeErrors; pub fn run(args: super::Opts) -> Result<()> { // Enables editing id in output file 2 if --edit-nr flag was included @@ -22,25 +23,27 @@ pub fn run(args: super::Opts) -> Result<()> { println!("Transferring UMIs to records..."); // Iterate over records in input files - for (r1_rec, ru_rec_res, r2_rec) in izip!(r1, ru, r2) { + for (r1_rec_res, ru_rec_res, r2_rec_res) in izip!(r1, ru, r2) { + let r1_rec = r1_rec_res + .with_context(|| format!("Failed to read records from {}", &args.r1_in[0]))?; + let r2_rec = r2_rec_res + .with_context(|| format!("Failed to read records from {}", &args.r2_in[0]))?; let ru_rec = ru_rec_res .with_context(|| format!("Failed to read records from {}", &args.ru_in[0]))?; - // Write to Output file (never edit nr for R1) - write_file_r1 = file_io::write_to_file( - r1_rec.with_context(|| format!("Failed to read records from {}", &args.r1_in[0]))?, - write_file_r1, - &ru_rec.seq(), - false, - ); - - // Write to Output file (edit nr for R2 if --edit-nr flag was included) - write_file_r2 = file_io::write_to_file( - r2_rec.with_context(|| format!("Failed to read records from {}", &args.r2_in[0]))?, - write_file_r2, - &ru_rec.seq(), - edit_nr, - ); + if r1_rec.id().eq(ru_rec.id()) { + // Write to Output file (never edit nr for R1) + write_file_r1 = file_io::write_to_file(r1_rec, write_file_r1, &ru_rec.seq(), false); + } else { + return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); + } + + if r2_rec.id().eq(ru_rec.id()) { + // Write to Output file (edit nr for R2 if --edit-nr flag was included) + write_file_r2 = file_io::write_to_file(r2_rec, write_file_r2, &ru_rec.seq(), edit_nr); + } else { + return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); + } } Ok(()) } From aef2bacbb29f93340c5d5e9bbd9e791ade5ca4d4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 5 May 2023 20:15:20 +0200 Subject: [PATCH 17/72] Further refactor: Subcommand structure to allow for easy addition of further subcommands in later versions. --- src/auxiliary.rs | 11 +++++++ src/main.rs | 76 ++++++++++++++++++--------------------------- src/umi_external.rs | 47 +++++++++++++++++++++++++++- 3 files changed, 87 insertions(+), 47 deletions(-) create mode 100644 src/auxiliary.rs diff --git a/src/auxiliary.rs b/src/auxiliary.rs new file mode 100644 index 0000000..81d9099 --- /dev/null +++ b/src/auxiliary.rs @@ -0,0 +1,11 @@ +use std::time::Instant; + +pub fn timedrun(msg: &str, func: F) -> R +where + F: FnOnce() -> R, +{ + let start = Instant::now(); + let measure = func(); + println!("{msg} after {:.1} seconds", start.elapsed().as_secs_f32()); + measure +} diff --git a/src/main.rs b/src/main.rs index 2a65b3c..f03129e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,12 @@ +extern crate core; + use anyhow::{Context, Result}; use clap::Parser; +use crate::auxiliary::timedrun; +use crate::umi_external::OptsExternal; +///use crate::umi_internal::OptsInternal; +mod auxiliary; mod file_io; mod umi_errors; mod umi_external; @@ -11,53 +17,31 @@ mod umi_external; author = "Written by Judit Hohenthal, Matthias Zepper, Johannes Alneberg", about = "A tool for transferring Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" )] -pub struct Opts { - #[clap( - long, - default_value = "output", - help = "Prefix for output files, omitted flag will result in default value. - \n " - )] - prefix: String, - #[clap( - long, - help = "Automatically change '3' into '2' in sequence header of output file from R3. - \n " - )] - edit_nr: bool, - #[clap( - long, - required = true, - help = "[REQUIRED] Input file 1 with reads. - \n " - )] - r1_in: Vec, - #[clap( - long, - required = true, - help = "[REQUIRED] Input file 2 with reads. - \n " - )] - r2_in: Vec, - #[clap( - long, - required = true, - help = "[REQUIRED] Input file with UMI. - \n" - )] - ru_in: Vec, - #[clap( - long, - help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). - \n " - )] - gzip: bool, + +pub struct Opt { + #[clap(subcommand)] + cmd: Subcommand, +} + +#[derive(Debug, Parser)] +enum Subcommand { + /// Integrate UMIs from a separate FastQ file. + External(OptsExternal), + // Extract UMIs from the reads themselves. + // Internal(OptsInternal), } -fn main() -> Result<()> { - // Parse command line arguments - let args = Opts::parse(); +fn main() { + let opt: Opt = Opt::parse(); + timedrun("umi-transfer finished ", || { + let res = match opt.cmd { + Subcommand::External(arg) => { + umi_external::run(arg).context("Failed to include the UMIs") + } //Subcommand::Internal(arg) => umi_internal::run(arg), + }; - umi_external::run(args).context("Failed to include the UMIs")?; - Ok(()) + if let Err(v) = res { + println!("{:?}", v) + } + }); } diff --git a/src/umi_external.rs b/src/umi_external.rs index 063c5c9..8a12ddd 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -1,10 +1,55 @@ use anyhow::{anyhow, Context, Result}; +use clap::Parser; use itertools::izip; +use std::path::PathBuf; use super::file_io; use crate::umi_errors::RuntimeErrors; +#[derive(Debug, Parser)] +pub struct OptsExternal { + #[clap( + long, + default_value = "output", + help = "Prefix for output files, omitted flag will result in default value. + \n " + )] + prefix: String, + #[clap( + long, + help = "Automatically change '3' into '2' in sequence header of output file from R3. + \n " + )] + edit_nr: bool, + #[clap( + long, + required = true, + help = "[REQUIRED] Input file 1 with reads. + \n " + )] + r1_in: Vec, + #[clap( + long, + required = true, + help = "[REQUIRED] Input file 2 with reads. + \n " + )] + r2_in: Vec, + #[clap( + long, + required = true, + help = "[REQUIRED] Input file with UMI. + \n" + )] + ru_in: Vec, + #[clap( + long, + help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). + \n " + )] + gzip: bool, +} -pub fn run(args: super::Opts) -> Result<()> { +pub fn run(args: OptsExternal) -> Result<()> { // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { From c62c17a9b9ec4e54c1982b336d53c0640332ded8 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 5 May 2023 20:54:01 +0200 Subject: [PATCH 18/72] Implemented a simple counter for the records. --- src/file_io.rs | 3 ++- src/main.rs | 2 +- src/umi_external.rs | 59 +++++++++++++++++++++++++++++++-------------- 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 484835d..94470c2 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,4 +1,5 @@ use file_format::FileFormat; +use std::path::PathBuf; // Defining types for simplicity type File = std::fs::File; @@ -48,7 +49,7 @@ impl OutputFile { } // Read input file to Reader. Automatically scans if gzipped from file-format crate -pub fn read_fastq(path: &str) -> bio::io::fastq::Reader> { +pub fn read_fastq(path: &PathBuf) -> bio::io::fastq::Reader> { let format = FileFormat::from_file(path).unwrap(); if format == FileFormat::Gzip { bio::io::fastq::Reader::new(ReadFile::Gzip( diff --git a/src/main.rs b/src/main.rs index f03129e..3a2f539 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,7 @@ enum Subcommand { fn main() { let opt: Opt = Opt::parse(); - timedrun("umi-transfer finished ", || { + timedrun("umi-transfer finished", || { let res = match opt.cmd { Subcommand::External(arg) => { umi_external::run(arg).context("Failed to include the UMIs") diff --git a/src/umi_external.rs b/src/umi_external.rs index 8a12ddd..e6ffc37 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -21,35 +21,39 @@ pub struct OptsExternal { )] edit_nr: bool, #[clap( - long, + short = '1', + long = "in1", required = true, help = "[REQUIRED] Input file 1 with reads. \n " )] - r1_in: Vec, + r1_in: PathBuf, #[clap( - long, + short = '2', + long = "in2", required = true, help = "[REQUIRED] Input file 2 with reads. \n " )] - r2_in: Vec, + r2_in: PathBuf, #[clap( - long, + short = 'u', + long = "umi", required = true, help = "[REQUIRED] Input file with UMI. \n" )] - ru_in: Vec, + ru_in: PathBuf, #[clap( - long, + short = 'z', + long = "gzip", help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). \n " )] gzip: bool, } -pub fn run(args: OptsExternal) -> Result<()> { +pub fn run(args: OptsExternal) -> Result { // Enables editing id in output file 2 if --edit-nr flag was included let mut edit_nr = false; if args.edit_nr { @@ -57,24 +61,42 @@ pub fn run(args: OptsExternal) -> Result<()> { } // Create fastq record iterators from input files - let r1 = file_io::read_fastq(&args.r1_in[0]).records(); - let r2 = file_io::read_fastq(&args.r2_in[0]).records(); - let ru = file_io::read_fastq(&args.ru_in[0]).records(); + let r1 = file_io::read_fastq(&args.r1_in).records(); + let r2 = file_io::read_fastq(&args.r2_in).records(); + let ru = file_io::read_fastq(&args.ru_in).records(); // Create write files. let mut write_file_r1 = file_io::output_file(&format!("{}1", &args.prefix), args.gzip); let mut write_file_r2 = file_io::output_file(&format!("{}2", &args.prefix), args.gzip); + // Record counter + let mut counter: i32 = 0; + println!("Transferring UMIs to records..."); // Iterate over records in input files for (r1_rec_res, ru_rec_res, r2_rec_res) in izip!(r1, ru, r2) { - let r1_rec = r1_rec_res - .with_context(|| format!("Failed to read records from {}", &args.r1_in[0]))?; - let r2_rec = r2_rec_res - .with_context(|| format!("Failed to read records from {}", &args.r2_in[0]))?; - let ru_rec = ru_rec_res - .with_context(|| format!("Failed to read records from {}", &args.ru_in[0]))?; + let r1_rec = r1_rec_res.with_context(|| { + format!( + "Failed to read records from {}", + &args.r1_in.to_string_lossy() + ) + })?; + let r2_rec = r2_rec_res.with_context(|| { + format!( + "Failed to read records from {}", + &args.r2_in.to_string_lossy() + ) + })?; + let ru_rec = ru_rec_res.with_context(|| { + format!( + "Failed to read records from {}", + &args.ru_in.to_string_lossy() + ) + })?; + + // Step counter + counter += 1; if r1_rec.id().eq(ru_rec.id()) { // Write to Output file (never edit nr for R1) @@ -90,5 +112,6 @@ pub fn run(args: OptsExternal) -> Result<()> { return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); } } - Ok(()) + println!("Processed {:?} records", counter); + Ok(counter) } From 029c087282443985028728dc8c3b151d2452f51d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 8 May 2023 15:08:52 +0200 Subject: [PATCH 19/72] Switching the CLI arguments from strings to Option and implemented an output overwrite check and prompt. --- Cargo.lock | 267 +++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 1 + src/auxiliary.rs | 23 +++- src/file_io.rs | 10 +- src/umi_errors.rs | 2 + src/umi_external.rs | 58 +++++++--- 6 files changed, 335 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 298d3f7..b2a29bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,7 +38,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -156,6 +156,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "1.0.0" @@ -263,6 +269,18 @@ dependencies = [ "syn", ] +[[package]] +name = "dialoguer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59c6f2989294b9a498d3ad5491a79c6deb604617378e1cdc4bfc1c1361fe2f87" +dependencies = [ + "console", + "shell-words", + "tempfile", + "zeroize", +] + [[package]] name = "either" version = "1.7.0" @@ -295,6 +313,36 @@ dependencies = [ "syn", ] +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys 0.48.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "feature-probe" version = "0.1.1" @@ -385,6 +433,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "indexmap" version = "1.9.1" @@ -406,6 +460,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +dependencies = [ + "hermit-abi 0.3.1", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "itertools" version = "0.10.5" @@ -438,9 +512,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.143" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "edc207893e85c5d6be840e969b496b53d94cec8be2d501b214f50daa97fa8024" [[package]] name = "libm" @@ -448,6 +522,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" +[[package]] +name = "linux-raw-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" + [[package]] name = "matrixmultiply" version = "0.3.2" @@ -709,6 +789,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + [[package]] name = "regex" version = "1.6.0" @@ -741,6 +830,20 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.37.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.48.0", +] + [[package]] name = "rustversion" version = "1.0.8" @@ -779,6 +882,12 @@ dependencies = [ "syn", ] +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + [[package]] name = "simba" version = "0.5.1" @@ -853,6 +962,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "tempfile" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.45.0", +] + [[package]] name = "termcolor" version = "1.1.3" @@ -917,6 +1039,7 @@ dependencies = [ "anyhow", "bio", "clap", + "dialoguer", "file-format", "flate2", "indicatif", @@ -993,3 +1116,141 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", +] + +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "zeroize" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" diff --git a/Cargo.toml b/Cargo.toml index f8db807..b8f599a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,4 @@ flate2 = "1.0.24" itertools = "0.10.5" file-format = "0.7.0" anyhow = "1.0.71" +dialoguer = "0.10.4" diff --git a/src/auxiliary.rs b/src/auxiliary.rs index 81d9099..ef74693 100644 --- a/src/auxiliary.rs +++ b/src/auxiliary.rs @@ -1,4 +1,7 @@ -use std::time::Instant; +use super::umi_errors::RuntimeErrors; +use anyhow::{anyhow, Result}; +use dialoguer::Confirm; +use std::{fs, path::PathBuf, time::Instant}; pub fn timedrun(msg: &str, func: F) -> R where @@ -9,3 +12,21 @@ where println!("{msg} after {:.1} seconds", start.elapsed().as_secs_f32()); measure } + +pub fn check_outputpath(path: PathBuf) -> Result { + let exists = fs::metadata(&path).is_ok(); + + if exists { + if Confirm::new() + .with_prompt(format!("{} exists. Overwrite?", path.display())) + .interact()? + { + println!("File will be overwritten."); + return Ok(path); + } else { + return Err(anyhow!(RuntimeErrors::FileExistsError)); + } + } else { + return Ok(path); + } +} diff --git a/src/file_io.rs b/src/file_io.rs index 94470c2..943b9f8 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -65,17 +65,19 @@ pub fn read_fastq(path: &PathBuf) -> bio::io::fastq::Reader OutputFile { - if gz { +pub fn output_file(mut name: PathBuf, gz: bool) -> OutputFile { + if gz | name.ends_with(".gz") { + name.set_extension("fastq.gz"); OutputFile::Gzip { - read: std::fs::File::create(format!("{}.fastq.gz", name)) + read: std::fs::File::create(name.as_path()) .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) .map(bio::io::fastq::Writer::new) .unwrap(), } } else { + name.set_extension("fastq"); OutputFile::Fastq { - read: std::fs::File::create(format!("{}.fastq", name)) + read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) .unwrap(), } diff --git a/src/umi_errors.rs b/src/umi_errors.rs index 54075a8..0d24054 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -2,6 +2,7 @@ pub enum RuntimeErrors { ReadIDMismatchError, FileNotFoundError, + FileExistsError, GeneralError, } @@ -13,6 +14,7 @@ impl std::fmt::Display for RuntimeErrors { "IDs of UMI and read records mismatch. Please provide sorted files!" ), Self::FileNotFoundError => write!(f, "Cannot read from specified path."), + Self::FileExistsError => write!(f, "Output file exists, but must not be overwritten."), Self::GeneralError => write!(f, "Encountered an error."), } } diff --git a/src/umi_external.rs b/src/umi_external.rs index e6ffc37..5374d7b 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -4,32 +4,31 @@ use itertools::izip; use std::path::PathBuf; use super::file_io; -use crate::umi_errors::RuntimeErrors; +use crate::{auxiliary::check_outputpath, umi_errors::RuntimeErrors}; #[derive(Debug, Parser)] pub struct OptsExternal { #[clap( - long, - default_value = "output", - help = "Prefix for output files, omitted flag will result in default value. + short = 'f', + long = "fix_numbers", + help = "Automatically change '3' into '2' in sequence header of output file from R3. \n " )] - prefix: String, + edit_nr: bool, #[clap( - long, - help = "Automatically change '3' into '2' in sequence header of output file from R3. + short = 'z', + long = "gzip", + help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). \n " )] - edit_nr: bool, + gzip: bool, #[clap( - short = '1', - long = "in1", + long = "in", required = true, help = "[REQUIRED] Input file 1 with reads. \n " )] r1_in: PathBuf, #[clap( - short = '2', long = "in2", required = true, help = "[REQUIRED] Input file 2 with reads. @@ -45,12 +44,24 @@ pub struct OptsExternal { )] ru_in: PathBuf, #[clap( - short = 'z', - long = "gzip", - help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). + long, + default_value = "output", + help = "Prefix for output files, omitted flag will result in default value. \n " )] - gzip: bool, + prefix: String, + #[clap( + long = "out", + help = "Path to FastQ output file for R1. + \n " + )] + r1_out: Option, + #[clap( + long = "out2", + help = "Path to FastQ output file for R2. + \n " + )] + r2_out: Option, } pub fn run(args: OptsExternal) -> Result { @@ -65,9 +76,20 @@ pub fn run(args: OptsExternal) -> Result { let r2 = file_io::read_fastq(&args.r2_in).records(); let ru = file_io::read_fastq(&args.ru_in).records(); - // Create write files. - let mut write_file_r1 = file_io::output_file(&format!("{}1", &args.prefix), args.gzip); - let mut write_file_r2 = file_io::output_file(&format!("{}2", &args.prefix), args.gzip); + // If output paths have been specified, check if the are ok to use or use prefix constructors. + let output1: PathBuf; + let output2: PathBuf; + + if args.r1_out.is_some() && args.r2_out.is_some() { + output1 = check_outputpath(args.r1_out.unwrap())?; + output2 = check_outputpath(args.r2_out.unwrap())?; + } else { + output1 = check_outputpath(PathBuf::from(format!("{}1", &args.prefix)))?; + output2 = check_outputpath(PathBuf::from(format!("{}2", &args.prefix)))?; + } + + let mut write_file_r1 = file_io::output_file(output1, args.gzip); + let mut write_file_r2 = file_io::output_file(output2, args.gzip); // Record counter let mut counter: i32 = 0; From e1c74020eccb859a8fde8194f5d3b65194be994e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 8 May 2023 15:38:01 +0200 Subject: [PATCH 20/72] Cleaner code for output checks and suffix updates. --- src/auxiliary.rs | 23 +---------------------- src/file_io.rs | 40 +++++++++++++++++++++++++++++++++++----- src/umi_external.rs | 24 ++++++++++++------------ 3 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/auxiliary.rs b/src/auxiliary.rs index ef74693..81d9099 100644 --- a/src/auxiliary.rs +++ b/src/auxiliary.rs @@ -1,7 +1,4 @@ -use super::umi_errors::RuntimeErrors; -use anyhow::{anyhow, Result}; -use dialoguer::Confirm; -use std::{fs, path::PathBuf, time::Instant}; +use std::time::Instant; pub fn timedrun(msg: &str, func: F) -> R where @@ -12,21 +9,3 @@ where println!("{msg} after {:.1} seconds", start.elapsed().as_secs_f32()); measure } - -pub fn check_outputpath(path: PathBuf) -> Result { - let exists = fs::metadata(&path).is_ok(); - - if exists { - if Confirm::new() - .with_prompt(format!("{} exists. Overwrite?", path.display())) - .interact()? - { - println!("File will be overwritten."); - return Ok(path); - } else { - return Err(anyhow!(RuntimeErrors::FileExistsError)); - } - } else { - return Ok(path); - } -} diff --git a/src/file_io.rs b/src/file_io.rs index 943b9f8..eb260ca 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,5 +1,9 @@ +use anyhow::{anyhow, Result}; +use dialoguer::Confirm; use file_format::FileFormat; -use std::path::PathBuf; +use std::{fs, path::PathBuf}; + +use super::umi_errors::RuntimeErrors; // Defining types for simplicity type File = std::fs::File; @@ -65,9 +69,8 @@ pub fn read_fastq(path: &PathBuf) -> bio::io::fastq::Reader OutputFile { - if gz | name.ends_with(".gz") { - name.set_extension("fastq.gz"); +pub fn output_file(name: PathBuf) -> OutputFile { + if name.ends_with(".gz") { OutputFile::Gzip { read: std::fs::File::create(name.as_path()) .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) @@ -75,7 +78,6 @@ pub fn output_file(mut name: PathBuf, gz: bool) -> OutputFile { .unwrap(), } } else { - name.set_extension("fastq"); OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) @@ -103,3 +105,31 @@ pub fn write_to_file( output.write(header, s.desc(), s.clone()) } } + +// Checks whether an output path exists. +pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { + // handle the compression and adapt file extension. + if compress | path.ends_with(".gz") { + path.set_extension("fastq.gz"); + } else { + path.set_extension("fastq"); + } + + // check if the path already exists + let exists = fs::metadata(&path).is_ok(); + + // return the path of it is ok to write, otherwise an error. + if exists { + if Confirm::new() + .with_prompt(format!("{} exists. Overwrite?", path.display())) + .interact()? + { + println!("File will be overwritten."); + return Ok(path); + } else { + return Err(anyhow!(RuntimeErrors::FileExistsError)); + } + } else { + return Ok(path); + } +} diff --git a/src/umi_external.rs b/src/umi_external.rs index 5374d7b..e5bb0be 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -4,7 +4,7 @@ use itertools::izip; use std::path::PathBuf; use super::file_io; -use crate::{auxiliary::check_outputpath, umi_errors::RuntimeErrors}; +use crate::{file_io::check_outputpath, umi_errors::RuntimeErrors}; #[derive(Debug, Parser)] pub struct OptsExternal { #[clap( @@ -77,19 +77,19 @@ pub fn run(args: OptsExternal) -> Result { let ru = file_io::read_fastq(&args.ru_in).records(); // If output paths have been specified, check if the are ok to use or use prefix constructors. - let output1: PathBuf; - let output2: PathBuf; + let mut output1: PathBuf = args + .r1_out + .unwrap_or(PathBuf::from(format!("{}1", &args.prefix))); + let mut output2: PathBuf = args + .r2_out + .unwrap_or(PathBuf::from(format!("{}2", &args.prefix))); - if args.r1_out.is_some() && args.r2_out.is_some() { - output1 = check_outputpath(args.r1_out.unwrap())?; - output2 = check_outputpath(args.r2_out.unwrap())?; - } else { - output1 = check_outputpath(PathBuf::from(format!("{}1", &args.prefix)))?; - output2 = check_outputpath(PathBuf::from(format!("{}2", &args.prefix)))?; - } + // modify if output path according to compression settings and check if exists. + output1 = check_outputpath(output1, &args.gzip)?; + output2 = check_outputpath(output2, &args.gzip)?; - let mut write_file_r1 = file_io::output_file(output1, args.gzip); - let mut write_file_r2 = file_io::output_file(output2, args.gzip); + let mut write_file_r1 = file_io::output_file(output1); + let mut write_file_r2 = file_io::output_file(output2); // Record counter let mut counter: i32 = 0; From 18323d34f11d18ace72f05380a9f790d3f54451e Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 8 May 2023 17:48:17 +0200 Subject: [PATCH 21/72] file_io::append_to_path() must work without owning the provided PathBuf. Had do clone twice :-( --- src/file_io.rs | 9 +++++++++ src/umi_external.rs | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index eb260ca..0bc3b0e 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -133,3 +133,12 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { return Ok(path); } } + +// probably, there is a better way to do this than with two copies ?!? +pub fn append_to_path(path: &PathBuf, string: &str) -> PathBuf { + let mut stem = path.to_owned(); + stem.set_extension(""); + let mut p_osstr = stem.as_os_str().to_owned(); + p_osstr.push(string); + p_osstr.into() +} diff --git a/src/umi_external.rs b/src/umi_external.rs index e5bb0be..5da48d8 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -43,13 +43,6 @@ pub struct OptsExternal { \n" )] ru_in: PathBuf, - #[clap( - long, - default_value = "output", - help = "Prefix for output files, omitted flag will result in default value. - \n " - )] - prefix: String, #[clap( long = "out", help = "Path to FastQ output file for R1. @@ -79,10 +72,10 @@ pub fn run(args: OptsExternal) -> Result { // If output paths have been specified, check if the are ok to use or use prefix constructors. let mut output1: PathBuf = args .r1_out - .unwrap_or(PathBuf::from(format!("{}1", &args.prefix))); + .unwrap_or(file_io::append_to_path(&args.r1_in, "_with_UMIs")); let mut output2: PathBuf = args .r2_out - .unwrap_or(PathBuf::from(format!("{}2", &args.prefix))); + .unwrap_or(file_io::append_to_path(&args.r2_in, "_with_UMIs")); // modify if output path according to compression settings and check if exists. output1 = check_outputpath(output1, &args.gzip)?; From 01a2db568ee72dd537f8606029f0326d3463f9a6 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 8 May 2023 22:24:05 +0200 Subject: [PATCH 22/72] Finished autogeneration of the file name extension, used a Regex to modify the input file names if no output file names were given. --- Cargo.lock | 13 ++++++----- Cargo.toml | 1 + src/file_io.rs | 51 +++++++++++++++++++++++----------------- src/main.rs | 2 +- src/umi_errors.rs | 8 ++++--- src/umi_external.rs | 57 ++++++++++++++++++++++++++------------------- 6 files changed, 76 insertions(+), 56 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2a29bf..c00d1f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "0.7.18" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -800,9 +800,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.6.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ "aho-corasick", "memchr", @@ -817,9 +817,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" [[package]] name = "rustc_version" @@ -1045,6 +1045,7 @@ dependencies = [ "indicatif", "itertools", "lazy_static", + "regex", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b8f599a..dc3cc71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ itertools = "0.10.5" file-format = "0.7.0" anyhow = "1.0.71" dialoguer = "0.10.4" +regex = "1.8.1" diff --git a/src/file_io.rs b/src/file_io.rs index 0bc3b0e..7e71f07 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,10 +1,10 @@ -use anyhow::{anyhow, Result}; +use super::umi_errors::RuntimeErrors; +use anyhow::{anyhow, Context, Result}; use dialoguer::Confirm; use file_format::FileFormat; +use regex::Regex; use std::{fs, path::PathBuf}; -use super::umi_errors::RuntimeErrors; - // Defining types for simplicity type File = std::fs::File; type Fastq = std::io::BufReader; @@ -52,19 +52,26 @@ impl OutputFile { } } -// Read input file to Reader. Automatically scans if gzipped from file-format crate -pub fn read_fastq(path: &PathBuf) -> bio::io::fastq::Reader> { - let format = FileFormat::from_file(path).unwrap(); +// Read input file to Reader. Automatically scans if input is compressed with file-format crate. +pub fn read_fastq(path: &PathBuf) -> Result>> { + if fs::metadata(&path).is_err() { + return Err(anyhow!(RuntimeErrors::FileNotFoundError)); + } + + let format = FileFormat::from_file(path).context("Failed to determine file format")?; if format == FileFormat::Gzip { - bio::io::fastq::Reader::new(ReadFile::Gzip( + Ok(bio::io::fastq::Reader::new(ReadFile::Gzip( std::fs::File::open(path) .map(std::io::BufReader::new) .map(flate2::bufread::MultiGzDecoder::new) - .unwrap(), - )) + .with_context(|| format!("Failed to open file: {:?}", path))?, + ))) } else { // If not gzipped, read as plain fastq - bio::io::fastq::Reader::new(ReadFile::Fastq(std::fs::File::open(path).unwrap())) + Ok(bio::io::fastq::Reader::new(ReadFile::Fastq( + std::fs::File::open(path) + .with_context(|| format!("Failed to open file: {:?}", path))?, + ))) } } @@ -108,11 +115,13 @@ pub fn write_to_file( // Checks whether an output path exists. pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { - // handle the compression and adapt file extension. - if compress | path.ends_with(".gz") { - path.set_extension("fastq.gz"); - } else { - path.set_extension("fastq"); + // handle the compression and adapt file extension if necessary. + if compress & !path.ends_with(".gz") { + if let Some(extension) = path.extension() { + let mut new_extension = extension.to_str().unwrap_or("").to_owned(); + new_extension.push_str(".gz"); + path.set_extension(new_extension); + } } // check if the path already exists @@ -134,11 +143,9 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { } } -// probably, there is a better way to do this than with two copies ?!? -pub fn append_to_path(path: &PathBuf, string: &str) -> PathBuf { - let mut stem = path.to_owned(); - stem.set_extension(""); - let mut p_osstr = stem.as_os_str().to_owned(); - p_osstr.push(string); - p_osstr.into() +pub fn append_umi_to_path(path: &PathBuf) -> PathBuf { + let path_str = path.as_os_str().clone().to_string_lossy(); + let re = Regex::new(r"^(?P\.*[^\.]+)\.(?P.*)$").unwrap(); + let new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); + PathBuf::from(new_path_str.to_string()) } diff --git a/src/main.rs b/src/main.rs index 3a2f539..5a00c9e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ extern crate core; -use anyhow::{Context, Result}; +use anyhow::{Context}; use clap::Parser; use crate::auxiliary::timedrun; diff --git a/src/umi_errors.rs b/src/umi_errors.rs index 0d24054..90bfe15 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -3,7 +3,7 @@ pub enum RuntimeErrors { ReadIDMismatchError, FileNotFoundError, FileExistsError, - GeneralError, + //GeneralError, } impl std::fmt::Display for RuntimeErrors { @@ -13,9 +13,11 @@ impl std::fmt::Display for RuntimeErrors { f, "IDs of UMI and read records mismatch. Please provide sorted files!" ), - Self::FileNotFoundError => write!(f, "Cannot read from specified path."), + Self::FileNotFoundError => { + write!(f, "Specified file does not exist or is not readable!") + } Self::FileExistsError => write!(f, "Output file exists, but must not be overwritten."), - Self::GeneralError => write!(f, "Encountered an error."), + //Self::GeneralError => write!(f, "Encountered an error."), } } } diff --git a/src/umi_external.rs b/src/umi_external.rs index 5da48d8..c144b5c 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -64,23 +64,47 @@ pub fn run(args: OptsExternal) -> Result { edit_nr = true; } - // Create fastq record iterators from input files - let r1 = file_io::read_fastq(&args.r1_in).records(); - let r2 = file_io::read_fastq(&args.r2_in).records(); - let ru = file_io::read_fastq(&args.ru_in).records(); + // Read FastQ records from input files + let r1 = file_io::read_fastq(&args.r1_in) + .with_context(|| { + format!( + "Failed to read records from {}", + &args.r1_in.to_string_lossy() + ) + })? + .records(); + let r2 = file_io::read_fastq(&args.r2_in) + .with_context(|| { + format!( + "Failed to read records from {}", + &args.r2_in.to_string_lossy() + ) + })? + .records(); + let ru = file_io::read_fastq(&args.ru_in) + .with_context(|| { + format!( + "Failed to read records from {}", + &args.ru_in.to_string_lossy() + ) + })? + .records(); // If output paths have been specified, check if the are ok to use or use prefix constructors. let mut output1: PathBuf = args .r1_out - .unwrap_or(file_io::append_to_path(&args.r1_in, "_with_UMIs")); + .unwrap_or(file_io::append_umi_to_path(&args.r1_in)); let mut output2: PathBuf = args .r2_out - .unwrap_or(file_io::append_to_path(&args.r2_in, "_with_UMIs")); + .unwrap_or(file_io::append_umi_to_path(&args.r2_in)); // modify if output path according to compression settings and check if exists. output1 = check_outputpath(output1, &args.gzip)?; output2 = check_outputpath(output2, &args.gzip)?; + println!("Output 1 will be saved to: {}", output1.to_string_lossy()); + println!("Output 2 will be saved to: {}", output2.to_string_lossy()); + let mut write_file_r1 = file_io::output_file(output1); let mut write_file_r2 = file_io::output_file(output2); @@ -91,24 +115,9 @@ pub fn run(args: OptsExternal) -> Result { // Iterate over records in input files for (r1_rec_res, ru_rec_res, r2_rec_res) in izip!(r1, ru, r2) { - let r1_rec = r1_rec_res.with_context(|| { - format!( - "Failed to read records from {}", - &args.r1_in.to_string_lossy() - ) - })?; - let r2_rec = r2_rec_res.with_context(|| { - format!( - "Failed to read records from {}", - &args.r2_in.to_string_lossy() - ) - })?; - let ru_rec = ru_rec_res.with_context(|| { - format!( - "Failed to read records from {}", - &args.ru_in.to_string_lossy() - ) - })?; + let r1_rec = r1_rec_res?; + let r2_rec = r2_rec_res?; + let ru_rec = ru_rec_res?; // Step counter counter += 1; From 71ec566a5709d759fad98f6a19766320b4322eaf Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 11:51:12 +0200 Subject: [PATCH 23/72] Implemented fixing the read numbers for both reads and using a custom delimiter. --- Cargo.lock | 259 +++++++++++++++++++++++--------------------- README.md | 106 +++++++++++------- src/file_io.rs | 46 +++++--- src/main.rs | 6 +- src/umi_external.rs | 46 ++++++-- 5 files changed, 270 insertions(+), 193 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c00d1f4..ecc4bbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,7 +97,7 @@ dependencies = [ "derive-new", "lazy_static", "regex", - "strum_macros 0.24.2", + "strum_macros 0.24.3", "thiserror", ] @@ -122,18 +122,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - [[package]] name = "bv" version = "0.11.1" @@ -170,9 +158,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.2.17" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29e724a68d9319343bb3328c9cc2dfde263f4b3142ee1059a9980580171c954b" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "atty", "bitflags", @@ -187,15 +175,15 @@ dependencies = [ [[package]] name = "clap_derive" -version = "3.2.17" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13547f7012c01ab4a0e8f8967730ada8f9fdf419e8b6c792788f39cf4e46eefa" +checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -209,16 +197,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.1" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89eab4d20ce20cea182308bca13088fecea9c05f6776cf287205d41a0ed3c847" +checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" dependencies = [ "encode_unicode", + "lazy_static", "libc", - "once_cell", - "terminal_size", "unicode-width", - "winapi", + "windows-sys 0.42.0", ] [[package]] @@ -232,11 +219,10 @@ dependencies = [ [[package]] name = "csv" -version = "1.1.6" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" dependencies = [ - "bstr", "csv-core", "itoa", "ryu", @@ -266,7 +252,7 @@ checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -283,9 +269,9 @@ dependencies = [ [[package]] name = "either" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "encode_unicode" @@ -310,7 +296,7 @@ checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -363,9 +349,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] name = "flate2" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f82b0f4c27ad9f8bfd1f3208d882da2b09c301bc1c828fd3a00d0216d2fbbff6" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", "miniz_oxide", @@ -382,9 +368,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -400,7 +386,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -420,9 +406,9 @@ dependencies = [ [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -441,9 +427,9 @@ checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", @@ -451,12 +437,13 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.0" +version = "0.17.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc42b206e70d86ec03285b123e65a5458c92027d1fb2ae3555878b8113b3ddf" +checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" dependencies = [ "console", "number_prefix", + "portable-atomic 0.3.20", "unicode-width", ] @@ -500,9 +487,9 @@ dependencies = [ [[package]] name = "itoa" -version = "0.4.8" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "lazy_static" @@ -512,15 +499,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.143" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc207893e85c5d6be840e969b496b53d94cec8be2d501b214f50daa97fa8024" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libm" -version = "0.2.2" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" +checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] name = "linux-raw-sys" @@ -530,10 +517,11 @@ checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" [[package]] name = "matrixmultiply" -version = "0.3.2" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84" +checksum = "090126dc04f95dc0d1c1c91f61bdd474b3930ca064c1edc8a849da2c6cbe1e77" dependencies = [ + "autocfg", "rawpointer", ] @@ -545,9 +533,9 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "miniz_oxide" -version = "0.5.3" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f5c75688da582b8ffc1f1799e9db273f32133c49e048f614d22ec3256773ccc" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ "adler", ] @@ -587,14 +575,14 @@ checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "ndarray" -version = "0.15.4" +version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec23e6762830658d2b3d385a75aa212af2f67a4586d4442907144f3bb6a1ca8" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" dependencies = [ "matrixmultiply", "num-complex", @@ -614,9 +602,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +checksum = "02e0d21255c828d6f128a1e41534206671e8c3ea0c62f32291e808dc82cff17d" dependencies = [ "num-traits", ] @@ -660,9 +648,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.13.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "ordered-float" @@ -675,31 +663,46 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.2.0" +version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "648001efe5d5c0102d8cea768e348da85d90af8ba91f0bea908f157951493cd4" +checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" [[package]] name = "paste" -version = "1.0.7" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" +checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", ] +[[package]] +name = "portable-atomic" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e30165d31df606f5726b090ec7592c308a0eaf61721ff64c9a3018e344a8753e" +dependencies = [ + "portable-atomic 1.3.1", +] + +[[package]] +name = "portable-atomic" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bbda379e6e462c97ea6afe9f6233619b202bbc4968d7caa6917788d2070a044" + [[package]] name = "ppv-lite86" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro-error" @@ -710,7 +713,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -727,18 +730,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.42" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c278e965f1d8cf32d6e0e96de3d3e79712178ae67986d9cf9151f51e95aac89b" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.20" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" dependencies = [ "proc-macro2", ] @@ -766,9 +769,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] @@ -809,12 +812,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - [[package]] name = "regex-syntax" version = "0.7.1" @@ -846,15 +843,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.8" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24c8ad4f0c00e1eb5bc7614d236a7f1300e3dbd76b68cac8e06fb00b015ad8d8" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "semver" @@ -864,22 +861,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.140" +version = "1.0.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc855a42c7967b7c369eb5860f7164ef1f6f81c20c7cc1141f2a604e18723b03" +checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.140" +version = "1.0.162" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f2122636b9fe3b81f1cb25099fcf2d3f542cdb1d45940d56c713158884a05da" +checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -935,27 +932,38 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", ] [[package]] name = "strum_macros" -version = "0.24.2" +version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4faebde00e8ff94316c01800f9054fd2ba77d30d9e922541913051d1d978918b" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", ] [[package]] name = "syn" -version = "1.0.98" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -977,47 +985,37 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" dependencies = [ "winapi-util", ] -[[package]] -name = "terminal_size" -version = "0.1.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "textwrap" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.31" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.31" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -1028,9 +1026,9 @@ checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" [[package]] name = "typenum" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "umi-transfer" @@ -1050,21 +1048,21 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.2" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-segmentation" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" [[package]] name = "unicode-width" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" [[package]] name = "vec_map" @@ -1118,6 +1116,21 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-sys" version = "0.45.0" diff --git a/README.md b/README.md index 1a86226..d8232b0 100644 --- a/README.md +++ b/README.md @@ -1,84 +1,95 @@ # umi-transfer -A tool for transfering Unique Molecular Identifiers (UMIs). -The UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the -header of the first two fastq files. +A tool for transferring Unique Molecular Identifiers (UMIs) provided as separate FastQ file to the header of records in paired FastQ files. + ## Background -Common demultiplexing softwares return a separate fastq file, usually named `R2`, containing UMIs. -However, common analysis tools does not allow for this and instead requires the UMI to be contained within the header of the two reads in the pair. -This tools performs this transform in an efficient manner and can also conveniently rename the oddly named read-`3` to read-`2` which is probably more widely recognized. + +To increase the accuracy of quantitative DNA sequencing experiments, Unique Molecular Identifiers may be used. UMIs are short sequences used to uniquely tag each molecule in a sample library and facilitate the accurate identification of read duplicates. They must be added during library preparation and prior to sequencing, therefore require appropriate arrangements with your sequencing provider. + +Most tools capable of taking UMIs into consideration during an analysis workflow, expect the respective UMI sequence to be embedded into the read's ID. Please consult your tools' manuals regarding the exact specification. + +For some some library preparation kits and sequencing adapters, the UMI sequence needs to be read together with the index from the antisense strand and thus will be output as a separate FastQ file during demultiplexing. + +This tools can integrate those separate UMIs into the headers in an efficient manner and can also correct divergent read numbers back to the canonical `1` and `2`. ## Installation ### Compile from source + Given that you have [rust installed](https://www.rust-lang.org/tools/install) on your computer, download this repo and run + ```shell cargo build --release ``` -That should create an executable `target/release/umi-transfer` that can be placed anywhere in your `$PATH` or be executed directly by specifying its' path: + +That should create an executable `target/release/umi-transfer` that can be placed anywhere in your `$PATH` or be executed directly by specifying its path: ```shell ./target/release/umi-transfer --version umi-transfer 0.2.0 ``` + ## Usage ->### Performance Note: +>### Performance Note +> >The decompression and compression used within umi-transfer is single-threaded, so to get the most reads per minute performance, see the [high performance guide](#high-performance-guide) -The tool requires three fastq files and additionally accepts flags to adjust the behaviour as can be seen from the help message: +The tool requires three FastQ files as input. You can manually specify the names and location of the output files with `--out` and `--out2` or the tool will append a `with_UMI` suffix to your input file names as output. It additionally accepts to choose a custom UMI delimiter with `--delim` and to set the flags `-f`, `-c` and `-z`. The latter specifies to compress the output and `-c` is used to ensure `1` and `2` as read numbers in the output. `-f` / `--force` will overwrite existing output files without prompting the user. ```raw -$ umi-transfer --help -umi-transfer 0.2.0 -Judit Hohenthal, Matthias Zepper, Johannes Alneberg -A tool for transfering Unique Molecular Identifiers (UMIs). - -The UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the -header of the first two fastq files. - +$ umi-transfer external --help + umi-transfer-external +Integrate UMIs from a separate FastQ file USAGE: - umi-transfer [OPTIONS] --r1-in --r2-in --ru-in + umi-transfer external [OPTIONS] --in --in2 --umi OPTIONS: - --edit-nr Automatically change '3' into '2' in sequence header of output file - from R3. - - --gzip Compress output files with gzip. By default turned off to encourage use - of external compression (see Readme). - + -c, --correct_numbers Ensure read numbers 1 and 2 in sequence header of output files. + + -d, --delim Delimiter to use when joining the UMIs to the read name. Defaults to `:`. + + -f, --force Overwrite existing output files without further warnings or prompts. + -h, --help Print help information - --prefix Prefix for output files, omitted flag will result in default value. - - [default: output] - --r1-in [REQUIRED] Input file 1 with reads. - - - --r2-in [REQUIRED] Input file 2 with reads. - - - --ru-in [REQUIRED] Input file with UMI. - - -V, --version Print version information + --in [REQUIRED] Input file 1 with reads. + + + --in2 [REQUIRED] Input file 2 with reads. + + + --out Path to FastQ output file for R1. + + + --out2 Path to FastQ output file for R2. + + + -u, --umi [REQUIRED] Input file with UMI. + + -z, --gzip Compress output files with gzip. By default turned off to encourage use + of external compression (see Readme). ``` ### Example ```shell -cargo run --release -- --prefix 'output' --edit-nr --r1-in 'R1.fastq' --r2-in 'R3.fastq' --ru-in 'R2.fastq' +umi-transfer external -f --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' ``` ### High Performance Guide -If you have more than one thread available on your computer and would like to process the read files as quickly as possible we recommend to use unix FIFOs (First In First Out) to handle decompression and compression of the fastq files. + +If you have more than one thread available on your computer and would like to process the read files as quickly as possible we recommend to use unix FIFOs (First In First Out) to handle decompression and compression of the FastQ files. This can be done as follows, given that you have your input files compressed as `fastq.gz`, first create FIFOs to represent your uncompressed input files: ```shell -$ mkfifo read1.fastq -$ mkfifo read2.fastq -$ mkfifo read3.fastq +mkfifo read1.fastq +mkfifo read2.fastq +mkfifo read3.fastq ``` + and then we use `zcat` to decompress our input files and send it to the pipe that the FIFOs represent: + ```shell $ zcat read1.fastq.gz > read1.fastq & [1] 233387 @@ -87,6 +98,7 @@ $ zcat read2.fastq.gz > read2.fastq & $ zcat read3.fastq.gz > read3.fastq & [3] 233389 ``` + Note the trailing `&` to leave these processes running in the background. We can inspect the directory with `ls`: ```shell @@ -99,7 +111,9 @@ prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read1.fastq prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read2.fastq prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read3.fastq ``` + We continue to create corresponding FIFOs for the output files (note that the filenames need to match the value given to `--prefix`) + ```shell $ mkfifo output1.fastq $ mkfifo output2.fastq @@ -108,23 +122,31 @@ $ pigz -p 10 --stdout > output1.fastq.gz < output1.fastq & $ pigz -p 10 --stdout > output2.fastq.gz < output2.fastq & [5] 233395 ``` + The value `10` is how many threads each of the `pigz` processes is allowed to use. The optimal value for this depends on several factors and for optimal performance you will have to do some testing on your exact hardware. We can then run the `umi-transfer` program as follows: + ```shell -$ umi-transfer --prefix output --edit-nr --r1-in read1.fastq --r2-in read3.fastq --ru-in read2.fastq +umi-transfer --prefix output --edit-nr --r1-in read1.fastq --r2-in read3.fastq --ru-in read2.fastq ``` It's good practice to remove the FIFOs after the program has finished: + ```shell rm read*.fastq output*.fastq ``` + ## For developers + To make modifications to `umi-transfer`, clone this repository, make your changes and then run the code with + ```shell cargo run -- ``` + or build the executable with + ```shell cargo build --release ``` diff --git a/src/file_io.rs b/src/file_io.rs index 7e71f07..f8557db 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -77,14 +77,25 @@ pub fn read_fastq(path: &PathBuf) -> Result OutputFile { - if name.ends_with(".gz") { - OutputFile::Gzip { - read: std::fs::File::create(name.as_path()) - .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) - .map(bio::io::fastq::Writer::new) - .unwrap(), + if let Some(extension) = name.extension() { + if extension == "gz" { + // File has gz extension, which has been enforced by check_outputpath() if -z was provided. + OutputFile::Gzip { + read: std::fs::File::create(name.as_path()) + .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } + } else { + // File has extension but not gz + OutputFile::Fastq { + read: std::fs::File::create(name.as_path()) + .map(bio::io::fastq::Writer::new) + .unwrap(), + } } } else { + //file has no extension. Assume plain-text. OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) @@ -98,27 +109,29 @@ pub fn write_to_file( input: bio::io::fastq::Record, output: OutputFile, umi: &[u8], - edit_nr: bool, + umi_sep: Option<&String>, + edit_nr: Option, ) -> OutputFile { let s = input; - if edit_nr { - let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + let delim = umi_sep.as_ref().map(|s| s.as_str()).unwrap_or(":"); // the delimiter for the UMI + if let Some(number) = edit_nr { + let header = &[s.id(), delim, std::str::from_utf8(&umi).unwrap()].concat(); let mut string = String::from(s.desc().unwrap()); - string.replace_range(0..1, "2"); + string.replace_range(0..1, &number.to_string()); let desc: Option<&str> = Some(&string); output.write(header, desc, s) } else { - let header = &[s.id(), ":", std::str::from_utf8(&umi).unwrap()].concat(); + let header = &[s.id(), delim, std::str::from_utf8(&umi).unwrap()].concat(); output.write(header, s.desc(), s.clone()) } } // Checks whether an output path exists. -pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { +pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Result { // handle the compression and adapt file extension if necessary. - if compress & !path.ends_with(".gz") { - if let Some(extension) = path.extension() { - let mut new_extension = extension.to_str().unwrap_or("").to_owned(); + if let Some(extension) = path.extension().and_then(|e| e.to_str()) { + if !extension.ends_with("gz") & compress { + let mut new_extension = extension.to_owned(); new_extension.push_str(".gz"); path.set_extension(new_extension); } @@ -128,7 +141,8 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool) -> Result { let exists = fs::metadata(&path).is_ok(); // return the path of it is ok to write, otherwise an error. - if exists { + if exists & !force { + // force will disable prompt, but not the check. if Confirm::new() .with_prompt(format!("{} exists. Overwrite?", path.display())) .interact()? diff --git a/src/main.rs b/src/main.rs index 5a00c9e..ff163bf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ extern crate core; -use anyhow::{Context}; +use anyhow::Context; use clap::Parser; use crate::auxiliary::timedrun; @@ -14,8 +14,8 @@ mod umi_external; #[derive(clap::Parser)] #[clap( version = "0.2.0", - author = "Written by Judit Hohenthal, Matthias Zepper, Johannes Alneberg", - about = "A tool for transferring Unique Molecular Identifiers (UMIs). \n\nThe UMIs are given as a fastq file and will be transferred, explaining the name umi-transfer, to the header of the first two fastq files. \n\n" + author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", + about = "A tool for transferring Unique Molecular Identifiers (UMIs).\n\nMost tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs.\n\n You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files.\n\n" )] pub struct Opt { diff --git a/src/umi_external.rs b/src/umi_external.rs index c144b5c..5f92c1f 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -8,9 +8,9 @@ use crate::{file_io::check_outputpath, umi_errors::RuntimeErrors}; #[derive(Debug, Parser)] pub struct OptsExternal { #[clap( - short = 'f', - long = "fix_numbers", - help = "Automatically change '3' into '2' in sequence header of output file from R3. + short = 'c', + long = "correct_numbers", + help = "Ensure read numbers 1 and 2 in sequence header of output files. \n " )] edit_nr: bool, @@ -21,6 +21,20 @@ pub struct OptsExternal { \n " )] gzip: bool, + #[clap( + short = 'f', + long = "force", + help = "Overwrite existing output files without further warnings or prompts. + \n " + )] + force: bool, + #[clap( + short = 'd', + long = "delim", + help = "Delimiter to use when joining the UMIs to the read name. Defaults to `:`. + \n " + )] + delim: Option, #[clap( long = "in", required = true, @@ -99,8 +113,8 @@ pub fn run(args: OptsExternal) -> Result { .unwrap_or(file_io::append_umi_to_path(&args.r2_in)); // modify if output path according to compression settings and check if exists. - output1 = check_outputpath(output1, &args.gzip)?; - output2 = check_outputpath(output2, &args.gzip)?; + output1 = check_outputpath(output1, &args.gzip, &args.force)?; + output2 = check_outputpath(output2, &args.gzip, &args.force)?; println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); @@ -123,15 +137,29 @@ pub fn run(args: OptsExternal) -> Result { counter += 1; if r1_rec.id().eq(ru_rec.id()) { - // Write to Output file (never edit nr for R1) - write_file_r1 = file_io::write_to_file(r1_rec, write_file_r1, &ru_rec.seq(), false); + // Write to Output file + let read_nr = if edit_nr { Some(1) } else { None }; + write_file_r1 = file_io::write_to_file( + r1_rec, + write_file_r1, + &ru_rec.seq(), + args.delim.as_ref(), + read_nr, + ); } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); } if r2_rec.id().eq(ru_rec.id()) { - // Write to Output file (edit nr for R2 if --edit-nr flag was included) - write_file_r2 = file_io::write_to_file(r2_rec, write_file_r2, &ru_rec.seq(), edit_nr); + // Write to Output file + let read_nr = if edit_nr { Some(2) } else { None }; + write_file_r2 = file_io::write_to_file( + r2_rec, + write_file_r2, + &ru_rec.seq(), + args.delim.as_ref(), + read_nr, + ); } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); } From aa2ab199a3d10a8847e40d830d27520924eff9cf Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 9 May 2023 14:02:06 +0200 Subject: [PATCH 24/72] Readme updates. --- README.md | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d8232b0..c38d46c 100644 --- a/README.md +++ b/README.md @@ -104,12 +104,12 @@ Note the trailing `&` to leave these processes running in the background. We can ```shell $ ls -lh total 1.5K --rw-rw----. 1 alneberg ngi2016004 4.5G Apr 13 12:18 read1.fastq.gz --rw-rw----. 1 alneberg ngi2016004 1.1G Apr 13 12:18 read2.fastq.gz --rw-rw----. 1 alneberg ngi2016004 4.5G Apr 13 12:18 read3.fastq.gz -prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read1.fastq -prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read2.fastq -prw-rw-r--. 1 alneberg ngi2016004 0 Apr 13 12:46 read3.fastq +-rw-rw----. 1 alneberg ngisweden 4.5G Apr 13 12:18 read1.fastq.gz +-rw-rw----. 1 alneberg ngisweden 1.1G Apr 13 12:18 read2.fastq.gz +-rw-rw----. 1 alneberg ngisweden 4.5G Apr 13 12:18 read3.fastq.gz +prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read1.fastq +prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read2.fastq +prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read3.fastq ``` We continue to create corresponding FIFOs for the output files (note that the filenames need to match the value given to `--prefix`) @@ -128,7 +128,7 @@ The optimal value for this depends on several factors and for optimal performanc We can then run the `umi-transfer` program as follows: ```shell -umi-transfer --prefix output --edit-nr --r1-in read1.fastq --r2-in read3.fastq --ru-in read2.fastq +umi-transfer --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq ``` It's good practice to remove the FIFOs after the program has finished: @@ -139,16 +139,8 @@ rm read*.fastq output*.fastq ## For developers -To make modifications to `umi-transfer`, clone this repository, make your changes and then run the code with +`umi-transfer` is a free and open-source software developed and maintained by scientists of the [Swedish National Genomics Infrastructure](https://ngisweden.scilifelab.se). We gladly welcome suggestions for improvement, bug reports and code contributions. -```shell -cargo run -- -``` - -or build the executable with - -```shell -cargo build --release -``` +If you'd like to contribute code, the best way to get started is to create a personal fork of the repository. Subsequently, use a new branch to develop your feature or contribute your bug fix. Ideally, use a code linter like `rust-analyzer` in your code editor. -Please make sure to activate code formatting by `rust-analyzer`. +Before developing a new feature, we recommend opening an issue on the main repository to discuss your proposal upfront. Once you're ready, simply open a pull request to the `dev` branch and we'll happily review your changes. Thanks for your interest in contributing to `umi-transfer`. From eba0854051b56ad9499d269c93d7a64df3ebe761 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 25 May 2023 16:31:50 +0200 Subject: [PATCH 25/72] Fixing issues highlighted by Clippy. --- Cargo.lock | 71 ++++++++++++++++++--------------------------- src/file_io.rs | 18 ++++++------ src/umi_external.rs | 4 +-- 3 files changed, 39 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ecc4bbe..02571d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,15 +197,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.5" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -458,9 +458,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi 0.3.1", "libc", @@ -505,15 +505,15 @@ checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libm" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "linux-raw-sys" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "matrixmultiply" @@ -689,14 +689,14 @@ version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e30165d31df606f5726b090ec7592c308a0eaf61721ff64c9a3018e344a8753e" dependencies = [ - "portable-atomic 1.3.1", + "portable-atomic 1.3.2", ] [[package]] name = "portable-atomic" -version = "1.3.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bbda379e6e462c97ea6afe9f6233619b202bbc4968d7caa6917788d2070a044" +checksum = "dc59d1bcc64fc5d021d67521f818db868368028108d37f0e98d74e33f68297b5" [[package]] name = "ppv-lite86" @@ -730,9 +730,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" dependencies = [ "unicode-ident", ] @@ -803,9 +803,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" +checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974" dependencies = [ "aho-corasick", "memchr", @@ -814,9 +814,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "rustc_version" @@ -861,22 +861,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.162" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71b2f6e1ab5c2b98c05f0f35b236b22e8df7ead6ffbf51d7808da7f8817e7ab6" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.162" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2a0814352fd64b58489904a44ea8d90cb1a91dcb6b4f5ebabc32c8318e93cb6" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -961,9 +961,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" dependencies = [ "proc-macro2", "quote", @@ -1015,7 +1015,7 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.15", + "syn 2.0.16", ] [[package]] @@ -1048,9 +1048,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-segmentation" @@ -1116,21 +1116,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.45.0" diff --git a/src/file_io.rs b/src/file_io.rs index f8557db..bc6e74e 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -3,7 +3,7 @@ use anyhow::{anyhow, Context, Result}; use dialoguer::Confirm; use file_format::FileFormat; use regex::Regex; -use std::{fs, path::PathBuf}; +use std::{fs, path::Path, path::PathBuf}; // Defining types for simplicity type File = std::fs::File; @@ -54,7 +54,7 @@ impl OutputFile { // Read input file to Reader. Automatically scans if input is compressed with file-format crate. pub fn read_fastq(path: &PathBuf) -> Result>> { - if fs::metadata(&path).is_err() { + if fs::metadata(path).is_err() { return Err(anyhow!(RuntimeErrors::FileNotFoundError)); } @@ -115,13 +115,13 @@ pub fn write_to_file( let s = input; let delim = umi_sep.as_ref().map(|s| s.as_str()).unwrap_or(":"); // the delimiter for the UMI if let Some(number) = edit_nr { - let header = &[s.id(), delim, std::str::from_utf8(&umi).unwrap()].concat(); + let header = &[s.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); let mut string = String::from(s.desc().unwrap()); string.replace_range(0..1, &number.to_string()); let desc: Option<&str> = Some(&string); output.write(header, desc, s) } else { - let header = &[s.id(), delim, std::str::from_utf8(&umi).unwrap()].concat(); + let header = &[s.id(), delim, std::str::from_utf8(umi).unwrap()].concat(); output.write(header, s.desc(), s.clone()) } } @@ -148,17 +148,17 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Res .interact()? { println!("File will be overwritten."); - return Ok(path); + Ok(path) } else { - return Err(anyhow!(RuntimeErrors::FileExistsError)); + Err(anyhow!(RuntimeErrors::FileExistsError)) } } else { - return Ok(path); + Ok(path) } } -pub fn append_umi_to_path(path: &PathBuf) -> PathBuf { - let path_str = path.as_os_str().clone().to_string_lossy(); +pub fn append_umi_to_path(path: &Path) -> PathBuf { + let path_str = path.as_os_str().to_string_lossy(); let re = Regex::new(r"^(?P\.*[^\.]+)\.(?P.*)$").unwrap(); let new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); PathBuf::from(new_path_str.to_string()) diff --git a/src/umi_external.rs b/src/umi_external.rs index 5f92c1f..9b880ca 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -142,7 +142,7 @@ pub fn run(args: OptsExternal) -> Result { write_file_r1 = file_io::write_to_file( r1_rec, write_file_r1, - &ru_rec.seq(), + ru_rec.seq(), args.delim.as_ref(), read_nr, ); @@ -156,7 +156,7 @@ pub fn run(args: OptsExternal) -> Result { write_file_r2 = file_io::write_to_file( r2_rec, write_file_r2, - &ru_rec.seq(), + ru_rec.seq(), args.delim.as_ref(), read_nr, ); From 0d5c6d4eda49cd1519b7fbc01b61625fa863fbb8 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 25 May 2023 17:52:06 +0200 Subject: [PATCH 26/72] Addressed the clippy warning 'Large size difference between variants' for the ReadFile enum by introducing a around the compressed input. Also used a BufReader for the plain text file. --- src/file_io.rs | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index bc6e74e..8029912 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -12,16 +12,16 @@ type Gzip = flate2::bufread::MultiGzDecoder; // Enum for the two acceptable input file formats: '.fastq' and '.fastq.gz' pub enum ReadFile { - Fastq(File), - Gzip(Gzip), + Fastq(std::io::BufReader), + Gzip(Box), } // Implement read for ReadFile enum impl std::io::Read for ReadFile { fn read(&mut self, into: &mut [u8]) -> std::io::Result { match self { - ReadFile::Fastq(file) => file.read(into), - ReadFile::Gzip(file) => file.read(into), + ReadFile::Fastq(buf_reader) => buf_reader.read(into), + ReadFile::Gzip(buf_reader) => buf_reader.read(into), } } } @@ -54,25 +54,24 @@ impl OutputFile { // Read input file to Reader. Automatically scans if input is compressed with file-format crate. pub fn read_fastq(path: &PathBuf) -> Result>> { - if fs::metadata(path).is_err() { - return Err(anyhow!(RuntimeErrors::FileNotFoundError)); - } + fs::metadata(path).map_err(|_| anyhow!(RuntimeErrors::FileNotFoundError))?; let format = FileFormat::from_file(path).context("Failed to determine file format")?; - if format == FileFormat::Gzip { - Ok(bio::io::fastq::Reader::new(ReadFile::Gzip( - std::fs::File::open(path) + let reader: ReadFile = match format { + FileFormat::Gzip => { + let file = File::open(path) .map(std::io::BufReader::new) - .map(flate2::bufread::MultiGzDecoder::new) - .with_context(|| format!("Failed to open file: {:?}", path))?, - ))) - } else { - // If not gzipped, read as plain fastq - Ok(bio::io::fastq::Reader::new(ReadFile::Fastq( - std::fs::File::open(path) - .with_context(|| format!("Failed to open file: {:?}", path))?, - ))) - } + .with_context(|| format!("Failed to open file: {:?}", path))?; + ReadFile::Gzip(Box::new(flate2::bufread::MultiGzDecoder::new(file))) + } + _ => { + let file = + File::open(path).with_context(|| format!("Failed to open file: {:?}", path))?; + ReadFile::Fastq(std::io::BufReader::new(file)) + } + }; + + Ok(bio::io::fastq::Reader::new(reader)) } // Create output files From b3a889082b1bafea9051788f5a86546bc2abb7c7 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 2 Jun 2023 17:20:33 +0200 Subject: [PATCH 27/72] Readme updates. --- README.md | 72 ++++++++++++++++++++++++++-------------- docs/.DS_Store | Bin 0 -> 6148 bytes docs/img/ngi_dark.png | Bin 0 -> 19261 bytes docs/img/scilifelab.png | Bin 0 -> 14568 bytes src/umi_external.rs | 4 +-- 5 files changed, 50 insertions(+), 26 deletions(-) create mode 100644 docs/.DS_Store create mode 100644 docs/img/ngi_dark.png create mode 100644 docs/img/scilifelab.png diff --git a/README.md b/README.md index c38d46c..8f580dd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,21 @@ -# umi-transfer +
+ The SciLifeLab logo + The logo of the National Genomics Infrastructure +
-A tool for transferring Unique Molecular Identifiers (UMIs) provided as separate FastQ file to the header of records in paired FastQ files. +

+

umi-transfer

+ A command line tool for transferring Unique Molecular Identifiers (UMIs) provided as separate FastQ file to the header of records in paired FastQ files. +

+
+ +- [Background on Unique Molecular Identifiers](#background) +- [Installing `umi-transfer`](#installation) +- [Using `umi-transfer` to integrate UMIs](#usage) +- [Improving performance with external multi-threaded compression](#high-performance-guide) +- [Contributing bugfixes and new features](#contribution-guide-for-developers) + +
## Background @@ -35,7 +50,9 @@ umi-transfer 0.2.0 > >The decompression and compression used within umi-transfer is single-threaded, so to get the most reads per minute performance, see the [high performance guide](#high-performance-guide) -The tool requires three FastQ files as input. You can manually specify the names and location of the output files with `--out` and `--out2` or the tool will append a `with_UMI` suffix to your input file names as output. It additionally accepts to choose a custom UMI delimiter with `--delim` and to set the flags `-f`, `-c` and `-z`. The latter specifies to compress the output and `-c` is used to ensure `1` and `2` as read numbers in the output. `-f` / `--force` will overwrite existing output files without prompting the user. +The tool requires three FastQ files as input. You can manually specify the names and location of the output files with `--out` and `--out2` or the tool will append a `with_UMI` suffix to your input file names as output. It additionally accepts to choose a custom UMI delimiter with `--delim` and to set the flags `-f`, `-c` and `-z`. + +`-c` is used to ensure the canonical `1` and `2` of paired files as read numbers in the output, regardless of the read numbers of the input reads. `-f` / `--force` will overwrite existing output files without prompting the user and `-c` enables the internal single-threaded compression of the output files. Alternatively, you can also specify an output file name with `.gz` suffix to obtain compressed output. ```raw $ umi-transfer external --help @@ -46,7 +63,7 @@ USAGE: umi-transfer external [OPTIONS] --in --in2 --umi OPTIONS: - -c, --correct_numbers Ensure read numbers 1 and 2 in sequence header of output files. + -c, --correct_numbers Read numbers will be altered to ensure the canonical read numbers 1 and 2 in output file sequence headers. -d, --delim Delimiter to use when joining the UMIs to the read name. Defaults to `:`. @@ -67,20 +84,20 @@ OPTIONS: -u, --umi [REQUIRED] Input file with UMI. - -z, --gzip Compress output files with gzip. By default turned off to encourage use - of external compression (see Readme). + -z, --gzip Compress output files. By default, turned off in favour of external compression. ``` ### Example ```shell -umi-transfer external -f --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' +umi-transfer external -fz -d '_' --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' ``` ### High Performance Guide -If you have more than one thread available on your computer and would like to process the read files as quickly as possible we recommend to use unix FIFOs (First In First Out) to handle decompression and compression of the FastQ files. -This can be done as follows, given that you have your input files compressed as `fastq.gz`, first create FIFOs to represent your uncompressed input files: +The performance bottleneck of UMI integration is output file compression. [Parallel Gzip](https://github.com/madler/pigz) can be used on modern multi-processor, multi-core machines to significantly outclass the single-threaded compression that ships with `umi-transfer`. + +We recommend using Unix FIFOs (First In, First Out buffered pipes) to combine `umi-transfer` and `pigz`: ```shell mkfifo read1.fastq @@ -88,18 +105,20 @@ mkfifo read2.fastq mkfifo read3.fastq ``` -and then we use `zcat` to decompress our input files and send it to the pipe that the FIFOs represent: +Assuming your compressed input files are called `read1.fastq.gz` and `read2.fastq.gz` and `read3.fastq.gz`, each can be linked to its respective FIFO like so: ```shell -$ zcat read1.fastq.gz > read1.fastq & +$ pigz -dc read1.fastq.gz > read1.fastq & [1] 233387 -$ zcat read2.fastq.gz > read2.fastq & +$ pigz -dc read2.fastq.gz > read2.fastq & [2] 233388 -$ zcat read3.fastq.gz > read3.fastq & +$ pigz -dc read3.fastq.gz > read3.fastq & [3] 233389 ``` -Note the trailing `&` to leave these processes running in the background. We can inspect the directory with `ls`: +Note the trailing `&` to leave these processes running in the background. Since multi-threading is hardly helpful for decompression, you could also use `zcat` or `gzip -dc` instead of `pigz -dc` here. + +We can inspect the directory with `ls` to list the compressed files and the created FIFOs: ```shell $ ls -lh @@ -112,35 +131,40 @@ prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read2.fastq prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read3.fastq ``` -We continue to create corresponding FIFOs for the output files (note that the filenames need to match the value given to `--prefix`) +We continue to create FIFOs for the output files: ```shell $ mkfifo output1.fastq $ mkfifo output2.fastq -$ pigz -p 10 --stdout > output1.fastq.gz < output1.fastq & +``` + +and set-up a multi-threaded `pigz` compression process each: + +```shell +$ pigz -p 10 -c > output1.fastq.gz < output1.fastq & [4] 233394 -$ pigz -p 10 --stdout > output2.fastq.gz < output2.fastq & +$ pigz -p 10 -c > output2.fastq.gz < output2.fastq & [5] 233395 ``` -The value `10` is how many threads each of the `pigz` processes is allowed to use. -The optimal value for this depends on several factors and for optimal performance you will have to do some testing on your exact hardware. -We can then run the `umi-transfer` program as follows: +The argument `-p 10` specifies the number of threads that each `pigz` processes may use. The optimal setting is hardware-specific and will require some testing. + +Finally, we can then run `umi-transfer` using the FIFOs like so: ```shell -umi-transfer --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq +umi-transfer external --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq ``` It's good practice to remove the FIFOs after the program has finished: ```shell -rm read*.fastq output*.fastq +rm read1.fastq read2.fastq read3.fastq output1.fastq output2.fastq ``` -## For developers +## Contribution guide for developers `umi-transfer` is a free and open-source software developed and maintained by scientists of the [Swedish National Genomics Infrastructure](https://ngisweden.scilifelab.se). We gladly welcome suggestions for improvement, bug reports and code contributions. If you'd like to contribute code, the best way to get started is to create a personal fork of the repository. Subsequently, use a new branch to develop your feature or contribute your bug fix. Ideally, use a code linter like `rust-analyzer` in your code editor. -Before developing a new feature, we recommend opening an issue on the main repository to discuss your proposal upfront. Once you're ready, simply open a pull request to the `dev` branch and we'll happily review your changes. Thanks for your interest in contributing to `umi-transfer`. +Before developing a new feature, we recommend opening an issue on the main repository to discuss your proposal upfront. Once you're ready, simply open a pull request to the `dev` branch and we'll happily review your changes. Thanks for your interest in contributing to `umi-transfer`! diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4a1f78b6babd0011724029c032fc71193bb32ef2 GIT binary patch literal 6148 zcmeHKOKQU~5S?jKFm#hmAkdAjkQ=DMJ%KM!l7L&VqY!A<+Vx~TORv#4nhC+g?b5Vm z2BbHdpX>*gJVeBs*ZrDkK|~dXAd500VjgwfSnw^7rN+A1rT#cl;`f^5*=O`ZJ+)nX z_59}PQ(bSnv_)){mhYeU%a4akUZ3_qc#qHDReftthb|pjR04Z0&VV!E3^)VMz(fq# ztyA4iw4ZO|3^)UShXFYs0)}8T%!=jcz*JfQ;2h>6(507NRTvb7kj z>97Zji-uWI(}}J5VB7iJyl~ka^+O3Kj*31z1J1yZftfBBa{oW#lNoLDhbg{t2AqL^ z#sJUjrrzMC>~6h!J-KTG#uJ8!#AQ(+&<8&OSjaiDDo*tW(GeF7v!YZHdr1fSk3b>B KCud*+2EG7be<;2H literal 0 HcmV?d00001 diff --git a/docs/img/ngi_dark.png b/docs/img/ngi_dark.png new file mode 100644 index 0000000000000000000000000000000000000000..87ada3373a1458eb181754933820104cbc5c9c20 GIT binary patch literal 19261 zcmaI618`BEvHYc`iJDE&u+qUhA?PM~sZ6|N|#34SOj`ZYbk$Ul=QVM#V>B{#Fg9cKv~&E21_9ys;A}?1!N|_YOeP3NLPEmtY--N?Q(W?Y z#Q%*1$Shr59eJ6UJUl!YJ=ho>oGqAGczAf2m|2-vSsDHz7+k#UU5z{$>|MzJTgm^@ zBW~tm;%w#UYUN;0@*llM#tv?-0%Ta{}d6k^4%>Hrx4N=VnuyKeaM1pViZd?MEC$8IjGg!C?apiI-42*uJaYQ zsUJ$I7eePc2<}@=0?3O1c;0x1yv0mzzLwVr! zv83_cZ& zk=mu=2kOBY9IB=#bfhdb1rWh`Nox?#uQ^}L+D@)pG)jES>w9me$)&sU9xiBif7Y24@rKx;a zprW7>9RsqtnCS50^sy2~`A}-f^~ArUrcJ!~RVrHq4TbI`o&h{+ph{f%hvGS&a31Ro z`%a^`>qMPPI>ev%>4kf4>X9vCE#O973Xp}Ep}!39OmNmR+SYrUoE3}|)OmHixiew2 z;vng&d-4h7=e5C;wxF?@Sp3O@P)yoo3utN-_Gu1&1Trk-Pbl4}Ha;c4a$b6Q{RGQZ zx0ecG{M6fK*k-q%%d^-W2y^dHZtq*)|JJ2U6QRe7F(V^b#=yqLR@2bX(RWZaYZ0L4 z2%mEvIgZoW67HKAqpi#NvwinUNN1m7hPFhyeOzzc%%Q%5?jOG3_Q^N>Jl$Fq8M^;l zj&U3|Vi20`MqoT9Ptt? zNLW%uWsY_t5_Y4%uGM~XZT0$j8LU(7vHei=TUS3Z*9==dU85Fp)m_tzG$q_&DJx!) z9jmVRwhenp&Aa0sI#|(;{i33(a^3kp=@mDTBb~RDLr=53r)NtnTjfSe%6kJdkU^*A z0(KLb?8=D#OYCdep2!ViVsf%`ws$I4r8I`>70GYx-jhEJ5)#rTN@|n;)dQg2n_b@1 zVV+~x`5;@^X$34p+CeSZz?*32=EOt`X$=)a==6R(3#l+vT8!(RKfd=<)zQH(&Ldb1 za(U0Rju5mW;6G5K7*$}N9!ny>5AHWypO~27fC;j+omW$~yTuu1_I@}a`#l7%cWdK` z1-vSfNv2Fiw_p53!0XZ4OjT1;Q&&(>(EU>K`vdq}RmJ(Zu&l1>=9a1sA*8DlpYMA{ zn*%d|nh<8wt>2sJe{8Y@(|A84^eviU1sm!WadOG;!G_XE@-*7}zInX{7jCu9M7(D( zEKDzfQlmi-Tz(?9R{et^Bq?5<=n?XFPkP4}Byck^mMdkvi8z{jsXW~@`VohkhLoam56HJ9>;@5qPJk`yYC@? zHvm@kBi-G?#NBm(YCP%HiOEk#{6%EvOVm?DMrM3}GM#Bgc^zhEt0X|dPl>L^IR={v ziB|p%X2K7~S8BMNQQOqg`S_tU!jo+GTbt;6vT!xZyX*%*A&8~Zhp2P|8lTJFwwU|K zT($aywG%vwe6KkI){Srf{dLJV$Em^mG^^ni zMUHc3W~K*tBRdn*H9Z9xN}0e%ien%6{+RakJ=CGYp;zLQVBsd0>0JeZTc;;&mUBhF zeCbChh_?2;wvL8Ixwf^{FY_fO@4lRgy=KmY?fAu(=OJr0uLf16F3!pQ1yi2Kxq{KZ z@vHJw&In(S*t(7;?(R=NjOqIKnQzYFRfc|uN&AqJlMkXuWF2p~%uIopLxifI0!uQR z${b;(wL#BuQ15v`g%7-6om0+(EpQ-4?vjo-UbiOlN^*Z~l;XeM?gl4U&crYOe4!kT z&P86KfEnfavwoPl>UCLhjFhHWj1n)myP}e(>1z*grVM#*$LJ0$N2^{&`f%98g>}(G zc1cOJ?_X&x8vYi4`-HJ^ND+0I*hHE(xx2r=Mn~}Kud1rL*x1+r1`+>l((m?e=TC&* z(yN%!#1yc3&(P|t?;2cpJxkkte1(Rzt_D{t^Hj8^L?<*=yq4q{w9IG%li@ijf1@C< z7)Wsdr31^4-HxbVi4i0g`GEb1@S3LhkjmS>?*gs4yfxT;CG;}!l2lRLrzyX^6kR(V zXJM0t>5sGl9Kc7qQ_XGj!T3vmKSNImF4S|S1+RG$e=$b~vIlXwgyT&8^k5E}KxB;| zbk{t!$oF$62uU8_8TQn+cY{8A_9SyxlmwWF9o4+LrG>Po0!w@|+K>hGRj%VwYPDL; zNu6L^r~$5gY^xf{R7#xS+3L37yP45qfItWcUF33Nfw=*?n`dz`V%aBGtnD&n1aU%$ z7HMJtJ!_XjHm9l@5&HDnKcjzXOqznt9#tAJ6GkG8a$dP=wc3%~d5!nh<(_doKCXo5 zH$L*m7EsBzScS&}Jf=0hd`o)lzoomL+Gl5b1{e-S)0#uaMr8vlzWe&HOcePyi=`4w zebT}*f!RG<;$M6_ZmEXJSLXBHB(2OTu`ce~kzI?L6Xm$xq+YeX|NeKVVYV8U(giN%{=ww%rhUgDBrg~Z!Q=zNtOw$E{@d|y6+Z9=xY2+?QVx0dLSL7GNvlTW+f zrzQY`a?~Cax+BDSCE41Ln;3174(+mFk(!+ynk_`%dP*l@FouJ74 z>2keQAD)v%AOt%-jg(=6L>65Ze0-z_PvIHvQ1J)9o(+y~JO9OVYM-IvcsSl5DQw4b$Mt6nE_7ld6Z=jO7wqYf;BIs1{Hs+uOd0~CY)BgJVSU^glT zaG?n!2_KP$rL!sF_^^QEzK^(7bJht*7^`jf>#fbH-cY9$TyFh74Sg>&%0??TB4vG> zn&J3ns1K7<5^pkacS{M0utYRqMM#Bl^klmg!(pcv(gx;(M0Js6&0;O}GThH!9Jqh8 zwDG=p>N5@}pR|*zDSQ&WpCtyX=+nHU3K%9y66bo*kUZ8PCBceDrL=#Af*%_-eU1Q? zgg$ArEIHb8S=}#I&WZeAcHx@E`QaT<;S71T-$-$kR3EmLp-d?#ar?(N1^iZP32Ltc zE$O}MdwagZ&Kjx{c+$w(&Q)N=BMCWM9EzU;LyZd!!liGeH(YDQ)M`)6MjzwDs6iAf z?AF8=oD8|in+W;btEd#N3|YX+7N4X3n1$*a&8JxoLU@o2+|u-bUj;~HZ;#<@j!f^{ zfI}ffl9-y4s-|DW!WHZ*9&d-^N4Wa%5bA7Hc)>YT1bBt81W^doAe^$!xVr8*J1 zF5>k)M9?Ort;T&TvI;>Zx3Z^THOBFo;0s7^j*R~LmESbBTxDv6=26~5KGqAj>j}D> zA@2$CxD7AQZ{<%oy6UPPIB5aV81ct zvTv_1)`g{+XT4bL7^kMLVWM^ydR|#A?WZuPnjR^2URgz51;%9MF%aF%*NjsOKeHF` zu}@qo8^VnE8}1KGSRnPJD0jSJ3r+Y=&FOvGg+aziD^y?riUD|fh$DaaY6HYVg}m;nj*O^Zj`le&MtAYxtgbrrZl`gn(MIkg|P zjW%|}ub>-CsTz20XzJvtdvH-!RA437OMW)}ekr&jcTUD@{+p>AD&?N?{Fa8GF2{P` z>eA_m1r(=0-8Z``9ej9t+&3LWGXcR;6IjpYI4?XZvmb7_@??G-T4njw^U3!YjAXze z@S13-@uWN@d_;d8us)=9nAMT5CQR}{S$0jEsErtW?tT*=YG zq}EHBK(!24JMMJ0bcDj0K&YQA#}7m|AgX4hGb?ORBpeDJC#mYez1(gB+7krhBEoF- znt*5+m4C^34N;*l7pa@4pYQmX=${JHhDxp@q91;UmxVkn($egr#G}qxzUWo+qzqi# zdb<0gg6;E&+*W)jWX%3f9J^a-DYhFVu~L?W>myNVDm_)w&I>DmH^?Lq@y!%7#)5rg zi(SN;x5wi5t1jHvPYFJivNC(Xk=8uX%tdT)ViRP?-PDo)^})E387-p1YeAU#j6#h|)|HFtTXqKn@-xEbSGZ_BW( zxK;Uj>d3b>4zi3EW?nQy7;}S>B&c3=C25U9A96Ws{!@$iMwVA+H|`1ZR-F}T_er@u zTO#v1gttgPUA(&cz3_6*mukz>1_8GEm?ZscjiWhF3a3OwUQqk!CyW=yYw?_DD)W-l zkkEoL8WgOkXNteL`4TL)fv?l9NrHg7tp8sjn23yX$vZ0M>@Hb-4l1_rwCCf7n{>Xxaf*Io<4vO*HD10MV${SG*QL9OR*UK2T z1dd+nfH|+3JdhURXSDGkOMn(_I!<&gpz~RNs3@Bz3%?ON$WhG_Dox-e>c z4fRo~`Cf=8kvaW3FrUPv4VF#O&We23e2osZ!n4W7{8B!f_Bk0+JoV%+{J`1tw3|&BAh`GQ(!osr^{jHvaL4VJqT=*8^9%*^h~UbRUB3^t zxPD}xCCU%o*N1+w4hSn+vo8LtSoEL@do!*RnvZ$}Wz1}dz_~zl;;d91Q?)dQSCc*q z15uWT3`A9Lp7N-tUYn5y|7GOU_MJa@Y0y|Id{*}F81x7Z6EIX6x^7h^LeS(*5~0gC zC1VoAaDg+SUh0KyO$8qB7IWX_fA+1Z-G@A(n}IFVe|Z3LB5)Pg zpvc2oG*;k)98x+2Ch4T8q0F5!+r=Qwmm6`;6d5uDq?;Q~=hC|h{^V3ZYeQq8DD>>; z&wT74e8k!I*lcs?sgx|4_o9=ha4dLS?Afo)Lq1}IzOwbQb?tpm9it2#lRW%YcdbG(>~pa(;;hv4>N>_Y4HPG11xD~ z2_vr0GNuC~KxtyUOZm0p0kU+>(G6UlZQQp4bU6Vfl+Ejj z^6N2+!zP81N}{93^Urw?t*)e_*$0P80t?bVj%YL zhyZa<32_hp=KkYh_G*dU6PT=vu9cNUosO24w)QJa?quoCgIW)9&y}p3aA2;Fc&*-; zvmnPL%!~6@`hZz2ws1>xOplkQRB$Nd$c%MF)F+Yn2W_3l~II4%RF?rFfvwL=j zxMVWfIT-M1Tt6nkg?z~)RaevL@zfE4i%+2LDH|s zpw}Op>?T5a)Ebi(;YBGSbdb&gXMVFv2#tf{Yg^E7Rp@TLsWVF~6I?p8KbQFG)$=SS z8V*rWV1C+S!+F*MPN!>`Rn;I~=HXOTN#L=;9E+M7*#U!#G}KEw_OC2*@F3MX)`Z9P{ZwxMBsXC zT&QSzp;8X#hjrglycY(IZ2iNvG%l-m&}*&{kcf^L@ay z-S*o0QdQC$0?HFlp$n?RV9n^>-h$PR)n_=`I^A>CyC&kU*7>i$+eW;e_!B}} zTwZYSfbUu^2!m|D6c<`a!9sWq;U{6xX6#W9%A11NU@t`D*^f`PyH=3OO4d=4*51LJ zYY=kC@=UPb&~EXE)8RidGp*ShQ?a&3hQth6J3Ow5QenfItSzyAs4vwk@zS zrF{wf^rhFjo66B2>nz6`k>U2eN4!I9!ZRi3zdq@~iXJ1*O2lgSyWz#GRV+Kp)U|ye z8mj?-w#NFRzD`eSXinlP;D2(igeZ`ij;C>M4(ALta;`5#~J*UM^<5)?41 zD`0}>W8?`JV)^Q-QgAF2ryxI~h3dPynwUiON3|3T%F@eg@iyG5CnBI8`%I1GyjqeA z8ZYH*=vQ4yeCYgSzW+2iW{N;1Qm9d6rw^ON&wcoD`A|pVTNk$)J45f($6PrcY91HL}U;4ew3bxf^ zLw6ydEb?t;s;?ZxNuU`H^?39_S_p_19D)A&U&Lyf)7kW86Z|3a=YRP0QMvfjq|8r9wVnR zn=`80aHTFz^?40zEzp>%t8CO4#T5ZeMU5yO_^4UmXiLpGPAQlfBP#rNf|r?Eno$fq zgVGFU7nW-nTYj^frT_k->9!h`2fC8FSysy61u0h2Yz{PEAOtMDn(NhHch&N~uX_^G zg*1|%*-IiFUo_54htpiXUv%U8IX?Oe5C$zwh|tU(nC{qAe64i0Ca9?9Xtt)zYiIDkRhMi3~&!2#xY$2o>zdRr&h zBFobHB(lT;^sNl1Yd%?{GoBXkPIU&p+=t~C0bI8pwZ&qe$2)R}uOE$Ew8DJWVtlsY zL4~Qf`S(mc4ODTDrdW4g0$a_wYexZ6L<4_^&5MX_KTLg}H$5_auG%daV$M@g9NSsT zJ+8MnU_F63Sl1nPhV`h$oAunSt&L`j%=T_WQ*EHv`&hTNrxsKr21l39@-?Y4p%U2( zI21s%)N(0**%i#h1i`mIOfbIVO(UcYJVy?{s%t8WU#i2-qpNMOU7Oa(Npg0x&d&#w z&DCNu%9ev4cVG`R>$4gd(o~zTgR^H>2FpFGE8+|^iz2b2iPEJ{)!1QMWeX@B(_j>` z+RNtIYf1C)EL-)L9Y9EjNLG_QgRBs!R^wQIdocpJrfg_xI;5A~PzO;M5l)G=ZZNiJ z4hz|LemdQ8uM#sM&P6`_WfNdn4Tt!PnzO!?@M=Pwg=Nd;Zuui+$^CEaO}TzfsJew> z+!{$Y?sSHlzWHzSCb>l!VLxEuq5F-ncm1yy3zqs{**0k;1b8GEjT-P`kZ1_`2btX0 zJgQP%k>&7g84Pv8Db_9d>@jJ{Rs zgTL7No`rL}%#K$$t&s+-^@}~W$U^p&d%uW6nb8D2t~P3z<1WU*c-0t_XJP=JTA?;U z)!zh=+S_)P7Mu49Qgq146O(mzN=yql(2ffz$1{vov8!}xOX=VB^b5sRnCNm605u!O zSMah?aR!J~YV;^jPnfP|O`zScY8qEMjpYZowvxxB*@;6V&b$wdo?%fX4cr>Yqt?DN z$oj|$)PV2~2lUt+sNWvMU-EVMUK&Ppwd&dPrjpXXYmc{B{9fCc4~PH6=7kGyJQ*r4 zHF|juK1yA4qEmhe$R5$3&80Q96j=TC!tu#(lKp7ZTEU{Q`l9VrNe*Ny!|`IBR4k0m zo75W0w&AK$WBlRI;duFrX=Zf6i_m<-qYEc(O8ZXz?(OFnREFxcQN|+{UMGXsb7GAR zPx?S2Amg&##EWXK&m)$&q+hh4IRWg>Xob} z<)4-bwTQL-@P;H$FTtCxAgA{QLVQEK_Ix|2LYBdODFUYG$C3Y%-1=mE}Azk6!~w+fie*dC|?;bylV?J@lEAeL(L0%Quw(+JWv@Sr~B*^ zRWbvo@$)@^1B*exvT9k1FQMZZ-bd%bCK8)Fn-38k2tE?U#gXxOz~v)`fKR zW#mOVXG0P`nCq95lj%>=&8qM?!7M+=_S`pU%TuYl9j?|mPgK4 zgCg!WVr;Ib#%4nA*WGSkOJ$tEr@oiI`g1IPQ=PUEyA&X5i-TMB%b#_Ij=3vo7@g1` z?vjUY7#SuI&cNUSoG2P&?ihbNQY*1R4Pgzgdq?P^gloieCj;LA}U;{kHZRk!&dV~<@r!M2;g$$r@26q z5^+FXFJ5ax{Up?94MY5!_P(c<4<`tPFXt_ zmQlOax=%zv%v;q&cSG~qUOZ4EEWTQ$0<357RD_b>^6nBOeI+m;C_j10)aMR?_k zTu!P`j_vET*k%@*%r~Ie9mMWX1jl`U1HkrHOenROV*zL`gB)z+Y?E*hg<2$&&S358 zIh#}hNC|`+LC`P4lJTjjyGF~C6L5oQah>xhAy!k4}&czO? zToytyTg4|8C=c|d%Eo0_&Qsr#EjV^Y)?2qUy(QsSISdRnqrMCX9?AR>)H5TtL6oqeHY7|5v(mBOu`lA zGR=j^{dRaP?|Z*LuZ4c#@*+9Oa856T@$8SpB`CW}iD9YC*m~$B1<5!pucVe_65?dM zx?xE7-p zFm_*qwlgplH(I6ib2f8aAM*OR|DFD07!Zlxzw6}q+nKouGND^Wl_T68F7djB#-?x7 zE5|X0!ooGE#8BBAic??)r0MS30xdDr@dBTgAreBM8{C%jZ?`{_*LJHl*tMpF(fJGK zqk8v~h5Lw+bQL^wHT07K#(Lv1l>P+us^-c*zg}}7XWa9rTKDA5K|-LBYqM$Ee2^dP zr3(pL5TJ@UYnng?$7`g2ql}nYB8eTpQvyGQw0cooIK`Y3GG!D@uWcBA1lKu&#I@>3U`LFVktQgetH^_;m+pQWcD@A z54sG?eAC_+PwoJLa6awm?Im5lp2ZPMPCvqB1W}jO0C#-<@>}#7Z`Vd^B>~Evc;3FS z+S^c#QAL?zUKdTuCe9%Y*VceO&*t6D(zdqkEOCgizGpCkPZc%v)BD<$A-D7N(jV)o(KBbZg?>%rCpqI`3{YFeS?ntK<)U^M$RNV++Bpn zD>))tq7{gLA0FUJefoOS7j%SXH@OkPzXuFdZKL`JTEM$0koQ6-+^`&GeSw!tHXR^( z+Y1=RubNk;{#9M;QJMQ;FDbnu>>0>00uRVIyKkmnAuds%3EA%}j}B{*SDe!^FIr>s z?OQo!_Q@01tJLZWDEP=flP4EyMr-Al^7s8919ufT@wHFfX{5s!-Y^4RJ%BID6TW1O zYyHaA%qc3A$dE*;HhCL*+BRaA*I0607`$+gh{uKd;DB8p$haAGkII>I0m%oM-uHzT;iJSQc<=Q@al9D*- z11gb7b~BP0#++-)Ge)u*Y#V*1|L=1o^qg!?$AI~XKHONUg6+|IEz#KXT=6n z6lG18B$>{H(Kh0*jmxX&v_n`^Bl!I?*R@0tpQf+iIWFKklcwjTj7{UwKLeN5|k5kxwwVO5OVjRO@TOcwF5Hy2UP9kPc|)M>gF zcGW9Ok^BuzSI`wngiti%>Oj8~u!MZ@>wL3PqZkb02CGK0Mkh)~f9pahz zY1j*@=+JC8Y@n>6IvBC%8$n>AB>?tnx1As{fOTODLCHQFO}KGrC?>X5T}N)@w?onn zq6Aljli}_9mKy_`UJ>z!DD&SvXy<9Cdug8$8h6fAaZJo3%nRjRFV#ja?;5N>DsjX< zUw7FxjFv;Fj$1~x>6`6*&~nEP{#t;Fne=jeX7I=<=Bn{wGv+d!n(#b1hZ^Q=|oE=CC!(TTOVy@m9u*7Ru*9#pF>69+8oIb zJ_mN#SUi(LN}qR(JN=;}A&t5kU_3_4o8&}}?3#egh`l&;Mju6qpmFtMxPG5EQZ;8# zP)D5eF)K5Uzj@$6Cg|?YElP$hGSsQKJz8%C!m^&@mX`%f{EQo(*9dL^FZFp1Um1g5 zN~#9?M@=SOgr38?gMQdNCcR%Ju#>dQd@fRRrzGsJe6;MVLe8Xa1uM){H}hu&%%re< z>SB&oib>u{i61k9P~sia<0(*Th>g!^NNNA0Fva;=^~%}OVt#HTAEYtiq>58(Y_wmZ ziOb2HeM&wn2w*E&r|q=H)-y16=~8cb;IUEE#>Vcm8?oW+Zj_okpoRKZL7LBKjeS4Z zpud=-zz|z(`!se~*&SX=?09_5SVIfqk76T@8xD<%Hu`bAP4dA~aU4#uTU{9s?CuG% zb~+|36tahLUcnT!>t)^5s!l$RNSN+s)efpadA7AF*rk{r)n|=Q)F)S)EJd%0YczP?!F`dmb@4cQlHoF4;N)=X>Ga<421TaqJI zHER-euGexp(?(mQ{=FT4i$>D*n4_yY`M;{FVv~7mQgvm1A+c9Uw9d< z*jCP+^^Ix>-MkAgzULhWhj!h!ZTdim{FaMxcRe57HnYnKD3 z_++h|TCzL_yqkt#BWH|`h=k}Fgyox!vl=TW&DFJv{0R z%oj)sx8W7}TD?b{3=+d0%`YQ##|JX!h<>=8y8(J*t*&3`qRu6?9IWwjc89tU6#44G24mqVGr7?5$QwZ zxf8@xa5$$4mdtW%4~pG=U!UVN1FC+#mc7dP%Ew`DYbE7(Cjt#belCNwT6g86Aeo~FTMq{<)5$iI0U_ zsD<>!Syn@e=#MAcfE=0a{9$nK8B$nf96H!M<0J5eInm~&d988v z;duD2R*Iisa#}B>p33NHPeZr*U?o}NUt_rE^lgfBL>`<)`M4FgmbtuW)=A;yX##1`8#5HEzvm@&^fzBA{jmV$f!H;&c(E?RbJ zWnPDl^Lb#Z`ZMRDI-O4d?j8qC{}8iVt>3RuW-{Y#f| zWNzF;!|84!R{i>zLtrZH9N9vBIepW|wIBQT>2^M{nZv|$#e09vjaqGT_FPG15Q1ypCjv@$Efqv&EJ3Pial<1Ld6CXEd4G? zRPI>ZiE_hWkVojoJo^!=c^{>EwTfqmR%xlETl0PxoJ_>-hAdgDP^@3GFLIJ9_ePbL z8)ujn8WhP-tXCxfO|my_s3Jb6eC1c|IOwaIIqHKnQjRs=s?(A6j4t(c%@?Ko=+OxE z${7V7X2lHD2o)e9j*@9gXBh-kv&2Rs7r>m-r{$2mpGCmt; zEe|)^6To;)7&8=*nssNpuuljAB>Xy4U!1I>IR1P&7xaGr@Kl{nYMYs)9x|bnX%uE=r zVj<|32Anek?fo>rdcF4d9X$apcS2f*y;K{`X0^p0nQL%|SQi`|n!<|8)zZKA`EDV1 z>B4-x1?Q=cuFspzRAzW&IAp#u88A05>?SE&J>Ag?r+I9-adR?K>483vALQrYs6BQ& zYZ_K#MK5PyJYr}|YVzThFTF>~4FHVZeWgWJvpJ+#I?T@LoK$gG%iIf}B6Q4%#9{u4 zE}F(VVN39*r?Mxo z0+nwuO0kg(hKNgV-Y40c0~G2&!UqNjfo|Rf$eg;j@2YBL6ptI%zZ8CL4D^mX;*a-zywY0HU5hE$|LN2)+zhfTT*JQ` zioUTOib869;dokb4rk~Ct}{vF#1~myJ>XW-8YX?c+Z(=(4C zsw@bN;6OynS6Hg0wlQ13Y+uYtuk%Q7qm2(T-$#IO$HokxUdxsGq8(x*9xxb*kVMXZ zD&Hm$s2%F)yWaKSw6%bz=pP4CW~5#sF00?C`c+PMnI35lb0pmi+mb&Ui8w>L*_8^R zMS@{%;tmNN@xW_4?)yoQc1q3!k1ikJgZ5*Sj}*Qp&F-)W{%A>j!LqV;{}X<@w7&g% zTNJGM8A>5vRANWu&6I!pYj&h7cgbA^p)=$Vtwv0(Xy@iz`3CWixOCqvekceBDtuOxa`W?aOYYz8F@#1vX zNyO#q$WW#G)wvp975{uryHPStAu6Yyg zwpwINP>`FFX%O&;{kM!S~H_dIkT zqFKSwUQIh;qQ(|!1EJvZ`OsvH;ww`b9i>ua0o+=FSs}501Fl?R5$Pv10u$Pr=Z4NQ zgQ~N^j0;d(1ENoHDXEcb@BVSnWqIzDo=vu~hM#N_P9QkQp0j68R3R1oGzQ_hzcv+l zm4UFVjA|l*BrOp6Xm@v+1@e;#R+xk0=HB72Xmw<0A)CGincBYzw~F9+-1MQVE?$^p z6L+3W>I2K@aMiaju$`t5y58Y6L85CZTIg0Ew z{l-H(g15f zb3*J*1+ax0D!qp_qt;xo0sHBUP(SVCD8)yOwAv@H2eHO={S*f#PV=7op{~`h46bPf zs8n?0cf%f*FwI4`W?up<&?u`kF7!~AT9QEju{-qs-p3`KCX zx88MdpO~23Z`rCuQJqd|$r@~JgSs77CrlwN5xmc)JTj{??7VAxf5dQ1GM?k@=f+4( ziH)=gjcp07e9JxK+f6C{R&Uv^X}QvF(_!X&PRu%s_81{tSFoThV2)@e;SSt~@E03u zcp*SuUVqp2>o$j`+ncHPMLJJd=w^Y>9LF26NL*RuV3*}c!9uyr>_!mWaqwM(ffHx) zr>MQ@JOri|7;Zi^;U#^sv$d#5Y9B~Jh{u*xjJm&Vs&EFIKMf90n{oaf=8`U3YzB(N z$-_$FQ#`^^5V)jp<^?GFcg`e4(rxKboFtG3*Yg>9WUvJJwNb zChrhuylEIt;{+|GU0<@LyB?Z8D%)PKJK(OlfEwt3L@z-_Z0UBa7l?G${`7i#VOoOH zV7-qk?Yfc+>pNv@03d>`$B*mUu_ zkUv^BBoHkI4^j1cQOLhPl!YPeO-JuFXddWs@Y~fTY-4ZI8+N1NMeTYkg`;n-Rx6`&QHL z!^eXoZ_fPZTZ9t|;8Byu>rg#BQvf2>60xk+^)pb6shUn-UGZ(aVvl=qzGxo=P$I_d zm#S|dWDr-2j3*z2IxP2m$4)y_fD;}f!(`lFT^n&O8(tP`J1;mN57Xw!9+SA7;2pl# zNa zsxy@#=PfrY4c@859U77_8IdErCdWfx<|EViU6DSb7{p}i++Oqh>(m5oyS`<9S%F>O zE~+=oVJWAi)O~V8w5Hb%6c;&3l##lZWb!wIz5!;KT#Fu~+5r_riD8C{_1}pHH59rm zjKrY>>o!s1{3XxKdD+9cq!juR zf(Z=LpseQ)5`6~a+k_v2Aon$qyd3zKqg74P&>G-S56m&M+}nL_MM*sLwnA#9Nz$KU zm^z#r;zpTTN<^cp!8@qhQ+<(|_8acqY1_~Y5qIsP~n_4mFYgW{_ujG$cvkA8OZ zDPcb@u>~+JU+nq#GD9ofR>b=|G>Bf1jLo3M&BE?k^ISt_le|Q%MZkz~7Yw?GQDPox zo}7Ts-_Hq7JA{Z$PCHCK-lrh>pIW+GovwQihs*EXL33b*A-&y~Q$sV&dY@^gbWUM8 z{rQS~5h3eV|3?573hMRRD4^{-2&)XcOept;(Na>Ho0AGtmmmB!!;Jt({f>s#9s6Qd z3dcr{{x@SB=#`vT`AtO2vyq(*HUs!hFV5ZRYE;QiM8KCqQBjL?KQC`gVUxsP4 z=dM}gLs1H&>i_@<|4BqaR380wu`3{M^=5qmk(I*-?W=9aI~qh zcGr^bTB^JUCM#z4IPgD-&5X(L#HXPBU0NIJM^LvI2J!P#&22<@k(Vc_TZF-R^)Ef#G@i^;vVm=y-w>A-C2pvR+RnG( zg#CO_*D}t~Ia0zbNas=wAOzE4;9L;|;V>uZs?BxFmMt>^!=N@2oMgU9(+q;sqSX+e za13IPr|y4|w;RoT@7Cw~;G7sxG z&q#!0ez>Cgki0H=-f(p|aNSWoD{h;)e4lV#jTY;t0`|y=~(~gGa~&O_YDK{u;Y9QkWS7yq-Wv^tm5>F6eE9?{#eT2yj{e7r3R_Xi%SZ zAj-ElV(fxNoli&g{_^CRE0+6$!PEH_+=IsrwLb+%$ATIAY#!*O3xNRD*-rs+O;!yd+lxjW1?VJ{?o*QIZBJoiP>1iCN?B zJv-{qh01LTh040FRRI}}_x zxqSlVxaEl_W_jhi&g}!UqRvN*179uDwb*@(nQ0dNE~8#u+c**bP}=#}eE|xXv-okH z6&u!XU7IX@b7oHd^OfOEuW^0-4Dvr9-NnPYXXb`>*Bsk^GY6k30)Z#@j2bQpwqv_m zbzwGrO?&zktPz|?g{aF21UpoFv2k!92fB|7j&$7?R?z%|_A-5EQ1(d)I(@kU{{aEu z$WHupo+7}d@KF56T&n`&7No*(j3s5PuAGWdwZFWKn5Olnvjn87l0~||T$dDCi>PmA z@hBJ(RT{)Fj1uJByFttO_%Sy7vm|weKrsm1jta zD^NBod-8FuDTeTL5{21>@zkf&){udFts27{|Ff+#yj|0rV?jI?tN19x)3Yd`zq zC%tAb;?t7tgK=|>BQ#}U!2Ik%@~i+WXljRUT?ViHX4$E=^$~qm^lHAE$~`aZYpT=_ zitb0>PNOe6dk~%$riJEq^ z&3w!C7Jg;Y860-%<}qfcFYGhLX#1d_8)80xEP$Q8bh!9!Tm#Mw19X9{XK~OeL_8Q=Htfr z`jFgK#&Q&<=DKA2Mo`s;>sl3%)Q^yI(JxlZCu4bMiLGnByM!~ecI$AN#N;G1mA9=p zdFG78g-ffDV=fvJOM;wMW=yBlMF1)SM{Yt3#90JuTMzMVaMDxdX{8%hN4-wa^l9!u zi{FGF@2)4`fN9)!^o6471Lrf3(l0S>TkzkFx~0tzA$~pepMo!`Uk0OI4sG5e&7P1P z<8ec}{TUPML#R83!8-^ta|g@eH`6=|ID&eXaw%6;T2~t=4&DpwYBzBNU%NNNBZ0af z1wY|^15N4kgodV$5y2{&$t+^MigJy)l2x3Q}`)XFqPW&1RZqR>lI(Zr+IZ ziUT%!I^=A{&<~i^l?%wC+Sr&d_7-e^!w7Dk1`p@b+)ZxgH-&TKuAg3jG%;aH!?Xg^ zsNHm!rsdP?P7kM-Nl#apJU!_N`p*ZYmb@oRW=FJs17$>b+~ntrp2M^YXs}&%XiBO zHa$I==i=hg*WH`uyK&Vqg?6aSPWGYMOb?7shnuybI<$L~_%3d9JXMCLSszGq>5!^3 z373kiZql0JLaJ?cubSW!BJO^lS7lwNH08PRbU%qlqc%u0IH$oi!`0188*?*8$v(Jr zZ|1w@TJu{S=u`oS-GAMDZ*Hf=HeV|Je<<*~j`QvQ{pZtK<3lLf+}g&K{pxC%{~?tA zuQVgZp<#EtG)1Pg=r4_s7&O_&kG3n}w8jI?eSS7gc37LceM#@{&ddsKJ-hr}6xc<9 zT@=_wfn5~XMS)!u*hPU|6xc<9T@?6#i30xzzpWb7J_~HV00000NkvXXu0mjfNqm<- literal 0 HcmV?d00001 diff --git a/docs/img/scilifelab.png b/docs/img/scilifelab.png new file mode 100644 index 0000000000000000000000000000000000000000..dfdc165a8d21412e18b9d4db06c4c1b882860b88 GIT binary patch literal 14568 zcmeIZbyU>d7B^0(ph!22NJ0Sy5f8XA$Rih?d08u}sX zyDJ_J>aSr>Y7H71PEmlqk+-fjj0x)LZtvs*Ve z?NuPKxs|Y{S|d5Q_U*WQ_~iKeN#MP=s@mVS*48UtNKMUSNOJ%kB5kevxV(C4yHB4Wyh~D*?wqj_;O| z1T-y?lhVD6)2g93trGV_&HbC5FPn2WyyD%Nq^Z^z9v(FLQkG$Uzn=TiO_5I{hH=a~ zSeZ1v2mZ0$K!3uny^FE(xm?VH*VAU`#Sce4?eG5I6bV^{rCiuEDSJnRJ0|>9xsfHa z*)KHAYVSO6kl_W9UVnHL2`A@GchxghyCDLIG2=os{f3r@PTtjl#&BFEH1q`~l}5f0a`6Z+X1o!Fgha zVJ3tnyxu((79*#a4fhqI-=r}Q*qREg3yHMMFap-N*|*Qw^f<~ZGYTtFOSMg6f6;0F zdYtjagV!3+i`)GQZ&Z~-vCddQLpDnJ*+v3FZDjT2P zzGoC)I*pI|q5JWjy;g5Z-u_pbG|IBp+0cBUbE{{JMQ+L$c0}_1Qbh%{nune^UG*eX zb-id<{prykjlIS`QKC9AX#rm+y{R!Hx9>TBXM=_EpFFGkK6TSc8_H+M<8y=M zwW521yBe~#jXQSSFI@WApQ_Q#Pqqx>ME*T8Tr16*7AHrvs!{yoH!=T%QFKB?8iZDg z;j?|od#x8jWtvP6)QLZN?Z(H|e`=pG&2FF5GvLGSDOxkA4}Rz2YQr zG0=28yH@TxEvBMontUg0^h9dXb?|95&kU=V#dNxB+Q)m>jqk(EUIk_i<*3=oc-l^@ zLzp&+j!&w)R|wDteDjbZQS^V+SLyY0UW~Qj+{N=k$#4KndxaRMDm~{2~|{ zaCdp7Fh6q(6s7pOm$|mT@zocvB7brG;V)W-YUR{?OypBfeP`a`S~o}1Q`Z0Z6m9my ziEaX|VMS7i<&l@-$2Pv_!S}tRZB+NlM(U36(>+EU_qs%%kGJKT;IMEsu&Uix=;tbc zg!M}(KcLK_SDV*ZNf5Q}FyCXM%9uA9#wIPOd5O2KPpT$lFy+7&BVY3IOkesAH10y% z&#-&HnE0vj0n1o-oM(Yp(wwe04To@xu2sn0o{F|=F9xa?VfCp4VUj|S!F0umcsU~v z8R@7*k{7r}?7VtW&+)Be2N@E}{mi+^q(WmgWkT~zK1d>?9f&JKwH`ndFd1!5eX%#e zNpiR^nO^HL6x(V(0~&Y-z00~!H97ixg@N1n5#D{F7v1Aq(`&mM0l}2?W>;M_AQQiv zQm>IbARy_Z8tCROW&kNdqTNr;KP0euLG zEt1ftDmEAhP=-$JjS4(BFUI7}7CB^H^p$=>8Xz1wBTK*AGI#l@u_bKWpq1G1)1+D` zl?*#g-yF6(nct0n2TrD33@JZl3)Y>xg#pj$5bA(vX6&aiW7v1AmayG=mY&QY=?1Y# zzuXyG@*#Ni2|X0vlgoukyLL6!5xka3WKCm0>f@xC!nBWr`P!OYGjQ(%uZ;|YvW2u? z*%!PkbE*ap`(P}?7OuO-RztCuIMMjpuH<7FD|&7vhRh5bYid_K;^@e#q_cqY2n8H>SU!h=RbpV6gOwZ-Oi zll$>szjrLuK9v)kBB|0YL)U4kR|5d! zipPQMiZv+|6zOUR6pQaYi8i2D=edetB~ndDe2X-J?%7stA{RfDMwT++g@n~Nf;bvg z;&0RlxNRgrJKdiHlRv}4w0VL3n^d@hv%-4u^%-Bw6J8s7%gQj?;Pvzgtg>+6&+q#r zqgn3SNc7&!2&t371Ex7Oo@qk6UoEKyHn@7t-itoPqNu{c z2axIO$WaR)fg>~%qRFUFEdl9r<@c#XDYV0JCXXU-zU;b{T`otZlOY6Oluo=A8$zj09DWQw|=fr4UK=`-Ms!{0mTQepEZ=1pNEgv)s^?(TX=aZ`l3Mo zQt1EM!b=|&i1F$|yxe^}Z6S)j5I64!{|;ei`%inQkEhFTbL?z+AubSCl&BY~SN{L# zQdw0)`=1uK6gW7!LVveHk^LW%-cI)aBI`eNyRG?c&c7>yYW@%Ie@Opr`)^^Cl!k_w zg1fEHt$L~ol7QR(#q8W|o$SQ^d=;@4wSxd{xJ7Mjf!uiU!ru6PT|&&MP1hmqTH z5kpynV%GZBQBeTDVNu42$$LVqz1=(B9_J!E}?g-WHPnE5swVMM3HNpS#sK3jd{x72? zWX~@O5drdZivoeS11@6E4dNH%=N1+L3IJ^cAR;!PKj!^|?&WUp4YT%y$U2}bMOlOL zpx@RovH#(c<3E*wIYMrU;u8RJ^9ghF3Fz|+iwO#e0R_4E1jP9G0K9)$%zHbp|2A2I z_y0qR#BYIrTL@6?{*<8>7}R>j`_F>)7iqUV{y+TrYcT#F_JD%^Unl<+zyG7_e{}s< z4E$HZ|MRZ@(e+<3@Lvi4&%6GAql@5QM;?e9Dhz_54m4pZ9qp*Y5RQ$SvI1HR+6LN< z`j7KnR0%#*#n=lCjpxDbFS_45XV?8<}p>sB_*!PnQJwt4M9VmB4pH!#z%J>#|0uoh-b74!FJRC%+c za`F<%I^-tfZpF~^Q~k+L8)v0Nk8|lWz+#6e^c&i3w`^ zyG)`LM~wCK(pEW%k#lV3)_(j$yGh`}ntNfRpN6yBAB*Me4mLs%=lHEkVdftih){*3 z>tCx3qQ6ze3)THLSyJ>|e_hhq$cq+*!8(*`s}ns}{pp{|BExmb+_Jx0?(^@-W6xD1 zvZntib}v)kjfbxVqaw?;-xWn*hK$y$SgyUK%zrd$U`+3pP%Se33!aoE^;ha*1C(Hr(SFp zYp2@~SbFN38}=H(fMBu`D$q+br9jsUTMMR_$WvWuspENq(&R_3%t^#90rDpnL$e`k zT3{2w{0KIP7=&pg`bKd9$4Y@UE!)+66lDh9n-ZK}Z>c?XjKQ)~(L)!hX7W8&sD^?y zapfZ@Iv;KCO2DZCFB8E{|@OvQfGa%4If{3IbnWGeAKF{E{C5#@G<)`&UQa8`b?wCWA7`qtfR43aWeyt&YX4qP_a;@YR^l?~X{;^XAccyZ3U0 zQXeh%?8{@B((Bue3!hMka&Ky^(hz5;jspBB&us*FIB14>YKJ%!;Brd@TBzk|H?0E%#RjGXThS zI>%Gbo-}LPM{k+f($Aj2pt_ywyNGwYy2j|b?OZiHeDin>?$iP@F&574s#_sfy zWfk33dp68Mu=${@8Q;aI%QgmuT!sJA)!6K1oo3ok2Sg0YnRIiBlE7j8Vd(X)&&k8) zxLLKJV;RORnrxWvnC@{s(PkS!;7v@6m#7t1 zB`=SlCvXREodfX80AqI=jq;0!xYYwIaKX_lgX-PnU38Je9$TRqEn)0Ay!v<4Mz5Qb z-{T#LV)=0*Uwu|wt~6nHE;QPi3Fz|u_fbX8Rq>-nnZoBAC zVo)s!bw4Z&YX=`00z54dgX8GJck8d^h=SKXB0yHN6gLg{6C_mc;nJij`20=EVYA`$ z7^dXIY}rk(Y6bQP_E7V|maHZ=umAl8g_%`E7Wa+Is4%hT&qZ zaFiw|SgaV`m^x7h0*H5rY!q_`AcC#_vrx_@l4hvG^pgEs&YJl~A@(2z@Lg;vjPfHn z3H+@NVvw_j1-Q<6&!ds;OW+&xa9bLyh3{B&_dKWV5$BL5TRkg`712sfk>T6fJh(dI zfR;=#bSD8Ki9AEiQbTn}2p)U)%TOiZ{2dxE$#UIy$EM|7tji(w>K-t3F7VLf{T#SO zDcqdaJI9S?M6yQnZUP}%jc5Fo7dMmg89#^1?qo6Vt^QhAr(YDP zbBc7|{ibV7zGg(mHhP~$D0R>Re1W;-aj`tQ-pz(S2;clg1kzli^cal5IUU$#=b1T3 ze|R{UBqO{C@MGRctGPIuA?a>attO%erMS|a!GOlT`TX?$_lAjj zdEk<()gN(umUsOq*lS%w(Jb2D#Y$pNFgjq+FWHF*{acl%R3uD0 zBtt7sk`KF!8HgLU=^uMxNfNNHW~{Lw1(9rs?||>-3XMyeHjk2lXQ7U7+KUy%YMJ{! zW7M$~UU3u6kVR66l=sb_V7bv`GJXtH@BZ0Rrh9Sq87Io$9T^W0$`|n%K4eJrEH?sQ zEH*7`VVuX#v@q-y!rgcQUNm+&Qi*CDP0rz4`51LVJ#`ky*6OmJq+vROpk52|yR%?Fsd3r%nt2byh04`ks2TSTMT$G^ zvg!K;ItIp-)>qV>u4|X-3KowRsy^RylN8fZ;CZ5!L91j=aOe^OCEefUt?P7Jxdf$t z<32P|ZMbC4*6LnRKQvC8+GPZf4b)*J;Wca%TG5g;s3~Oi=KQ2g>?s3{oU<|4F}{Jz z)2b8?b{NL9;_p7Npq^$<0_KSQ;M%~oEknKH6tAM z?O`APlY61Hk}9fBU1w}mO6~ZEkChmvKlIIaXWBg^5^H)oZ`i)1e11Rlf@4K|L(#fv z=+HauLUSVGryj`}`&SG<;KsIfq7Zo7PQH2e`xn9^xjxR39Cs=qd1s*42|>$eSY~T- z;0p@ms*BW>CThb5e!B}SnQ22k@eg#E;tyzYmYQR=y>SiJf9)xuryxCz1`BC$2{Jpn zuGjw1d$9)VJR|Juq<20i^Q+L2803oAx|#$cU-<)mnMch(A}{)J8pRiF{A(tEwZ*2Pr4Ikkf44WN11`ci;cW8-#Sk9k}T*Xs}1;2(%MNXH~ zk&XuZFe%W&9)k~wqJk%cS}%XOkFlxyW!t-Jra^=Eu9MV$xr z%4x~XXBn_E+5-hUhHbo7MPCpUXMvxm%?O|J^P(d-m;ofN`ejF6%W{E|`CU%s#!pJ# z&kOgpashw}vWum$!1Z1I%Gqy32_MxXAmTjPTFUe<^Yse`G*Kaz-V?i-=u8~k;wSH3 zbi2d?!zs@LnHCFX&>X&n5=`X%l4l|u_<4-S^ZtaD>KRLz!9nm!l|Yog^EnbQ-n_FU z`6<0|hQSZBUI@o}F5nI^hK2s0Z!tx1Rji4ATpoLH(W!#=MjclqoY z`Q}ig(uKa+o+7$1vR8uhySF9Lv*yYr$K0?HuEKX(Bq@U!*DZ;B?8!J5n7*3D^`;kT zC-|%cv~p-fUE8C?n(O4W$X}#(G))YO;K38m1vdPaC#qCP<)yxXVtDtWoYhHULlb@Y zK{#%6g$hf!6a^)(sFM_$e?<(o7-EhDb08?w%D4!lo0t|<^s31tpG`uvuLR!cm8;IK z0S_R85-ud^uYAX@*92GW7n!`3iXN)CWMS}e7um$mEk{Vg1GFQ@Y;lpq$QN;0+pzjR z81V6P(^j;dW2w6*u#}TQ6?iLVH!76NdQ89!m@nCEMtCrroU?-^Wfbz@`~E8?1x985S(Zah zyEiwmme#{|QKg-zgRr|xWUcIthz?uhChQzNq=^GBJHbAY+hFxj+vY5W+&Z6`Tn=A2 zA7$O>FTsJAm)HYm^;`qY?!b!KU2=iZdUuoC3fb#YEqapPQmZ6;0m$KJ#jEkOpP0teD#HQ<3K7t%(%Yys^DL8)cc14E} zQ?BRrhDWM)CDP5LoB zbjwl1&s7ZKh|R4FHLI6~fPtffkzwY+7JV?|o@)4_e%tlp{LSNx_l@vltfZp@#r)?E z`ETL21P?jR^NBN6SFWSIx>O!dOYo4_kc1VZ~N8M38h@q zf~w<8#r<3lF~O3v@6F26@F8jyud(-=^QnCArIbz;L?pfWnnjI?vw zQo%l#D6y9S!5B8Ee?$SEt3GnTs}6tB?1GS_;ARVO>^Zwj+A`E^=dd88 zwj!>{Z1v6BL*&qN)pHPdJ6xxiW`&}jyGX9D9YT3o*mvzo`NcHY9A89l9P=W^tkq26 znz}tOL=8IgG>u)FeZtJDI4;P@etNJI+mN7_H6Xy0xCUG38!B+6q}(W;d8^xxfG{ba zM$6H8`Vyzb=Q2Fh)ywH7Cj(XEw0)+S#;5dO@RJ&r^fUYd(FnYsQ4YV+9zD?a$z~Zy zsQo@Lhzcy9ker#S`v@i=5B7qg;i^xkR^%eepy}~F+5F90bP=T5^tyv3;mv$g>~_RK zd_`3v>X8M!Y@ctXaqWqZC5nzEI({S*50K|H5Q#tus6(F*kfvuTqL<7jBNSF2GxxaC;R-AG zX{gL|irS%ym}AUIK{xBUh*pDwc18Z0a@wW9*zqkq@)XL;&HXwM@0_O*Mcq*lqs~@FseZ)TIE_ig}r4`$<#Dv`TsTN7jgDv=m* zg4RE1oJpG`58Uowb>g~b#i{L$P+;oUlw@tlK2um56osD?+{{K?TJo>oKa((iB~yVN z^RCNLIKKri{T}nNq9_l$j1KC6-onrQh+^r@Vt%^7K@tIXrwY(|tz(77w6tLFp_d3J#zWK2UB3ppGPCZs}-NHMU)>Ww&# zc!y?}C-yx?72h@;Q?kMAbsH!FGx#Nhn1N%Rd zog!wuiDEw5PY-f65iLJJO` zlYzByn{xfwc;8Cvd8Pd7Z6n2cJC4+CRIMV7T=aY#n7W=4NgTH^J2;$lFQo!1%eeCk z(^sAsIa2@Wolfq&FIHFSrW-2f(ZfR3(M{DAicdO5dEWYDs6vNpmiP1oEoPBUDcwCb zcsKR=7FW%s^6c(7M7M$3qCK0geQ6FI=K^@Dn1VW!vbOw)=N-``NwFvKH&iIUpJE?nl2+p7*~fZR zg#O(m<{`uRfC<7;c94e0bWN$m4s31-?p}P97T5)<7z0{=gVe>4u3Hp`a`$Lgj4JYH zhM@gq`))n2&Pn-DfDV=TV1ov!=kq1+RlnE<*J~YoB^>LFe#+f7Xyr3w5o{XuZZ6lo z#_dxk#Me1y(oc^%`JCbT?)R3U4KF1$hd-L7)=#gif1X^x4CD zy7|u4fm4FDtwVkuxp{B1^UZ=&B5|ZZg^(aU?f%=_EK+^*c&ndABNq=^Qoxsn${85S ziy?6$@~`M6IV>s%^+z$oXg!7rBUu;q!Xg1rkmhBto>4^kXGt4&NUHLe<#8E+liNFA z;@JZBLPIq3$%GE9$?r>dDyFjsSK9_sJwqkcr1rC`d5hBIZ zyd4a#3YLVWTW$vJN>SLz3Y%{PV{?KBL>~Jyk*6Y10X|OnJ@ zog2eY!N)j5a`*~svz%v24h&-kZ7;+Ee%WCs9vbGbIcoFlU)Q^+so@>GvK$F$9FEYE zD|e4lyh#*G{tzo}X%V-9V3104476`&4<5k^I4t+$Tv;1%O|y}6$b1c|QSHa&cILkad>)Pn>N)xZpBk=pbfNVO*8gls z4;Ehp*yHTSIITa#Klnl~yUQXPTf%_yyg=5p(xU>_RJrmS1#mhlOTlSWRQdJr!r&%^ z76vbP-u|@+-Y9pUCIVhgYop<(u0SyrSIRMmpikpmHXWus#ons~gNHxRLtXg?sLPvy zamWwPEl0iu*wF;+>+NTDqyuWW#dH`Kh%7O(-&r0oQNItx}asfewv!@ zZRT+i+>kV6TTMFL*R9~(j1CYqo=~+ThLrPW>)e5h@vR=uQBANd*W#vHU<(zX8~{C? zso;K~9qkX*8Z%1IU?HR!FXF|I0c;RT$lWP$Py+Xo3kh?+7iw4X8Y{3&3i6=bp7JAU zSSTuO(UxD9^f~_ofvxY)5x-^_U%CB3(l+NoIUtC)kxmTbr1(jG#F4v%e_gdVBKt`SA zwkEUCk4It3V4+V~Qa%l#80k*|05h598_cbmKWGgk!}K<)RKrrsLc4qKm6u1U++I8TwPb zQCHjbwP|ETsmsWsGz{|^OX3~;h;E4Wr=znaQWm^zY%^O%Bq6_VPz!34+{Kb8gL9=f zqUns5hKPU2_}%&;&1*J6Xt9?19P!1IirDQ`O|OFbA-Y0!N}Tc)Jun zzF^ZqDTb4U6Eo@|ne@=FmoO*&q!-?3P9|FpA3u*i*ChE??X5mvL$p^5XJG8a?RI~6 z+1%rgbRo$cSGZ>Zi5joOU=01L}$@`0~I!yU;^Ag)i$w)={ zCW$=E{BqF>uhaiS3$e>MXH4L0SdL=-nfaC6q5yKV!1F@DJt9#ynD5J~1U4~QN(wV1 zCoa*?Fm6R*c?+X8_yw}q>@9S1s~Bn-TI3@M-wa0tGbesZ1so3VV0CkxDkzLSo!A|3 zSB&AZuac&%OB48hk{ykm*FaVo19f~73gF+TjdtU2eZTKL`Q*|PBYOt8L`#@+*f zK&_(aUGyEEqHkwfzUmej*BiY&GVVDIfn4+5bzC|+xl8~|!cYD89v`@8V%T|@vQ}Ws z2W+vz8<+J*ju9dCw#l6tSIX!YWLcp<=>n`nG*^;Sgch2_ElXnw?vsE=fmc!fvLu8Z zQizHxOWPV}@W3e)~(8DXGRV zWVieyHtvo1UgCOV>WUV8(4;%%Xdo)IW5AIhDg<0N=|u&uEj@x##(e6Z3K=Fz8}ZOY zOvgNW9FVSK&WIVQXlF1{_~cW65;$OY(?<-%h%VffyX#giM?$fb6J47O=res8$`6$o za0fW)Evx5D_C@&z6BZ^zO9I_XW%Bc*fmMw2cx_FJ^aD6JBV8mPGES-LC__1JX!$?Z zAh<_UJOl>bGnc4@^DfrjmF7}X6EHyqR-bj7AJ}n??jtXC6^Prm9VB&_zm$U= zFXP!C3k%_pGz@5c=Vj3!!wo|R{HlNUDdwJvm*TRc74D0#^|g4EKZ;u8W=0BpK!g+h z&W&ES0YBxCPptGvsCSb~TF8H8H>(AW8v=gO4|K0lG4U`KpCJ?xnw>GPg~R8BgGKkTlS&0S|CR!j0(RBiB1J_+gsuOm1 zCS)SL??Yow0E}@5`tu}f=yP>Zqnip9*pir+<_axZl|R**rj$!J?Lk^Um=R%sX-8%^ z=}@x0u(%)-`;@tkOmjjK!S#8z7F$t|sv*-yCI>V--ZIQ{oC}g{v3Oyii@pQI0ep&f zjqVI+MaWxB5Y6t^mCKzMSAFM-+z%43?2R8p@~eMzu@kTfFN^~)u8X|XHzsF!iT^e^F>f+HfyTPovWrTKkt3BWYKN!gVO{kdb&Pk4$70@K@EA$_DyCLk9im0G+b h3jN literal 0 HcmV?d00001 diff --git a/src/umi_external.rs b/src/umi_external.rs index 9b880ca..0bd549b 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -10,14 +10,14 @@ pub struct OptsExternal { #[clap( short = 'c', long = "correct_numbers", - help = "Ensure read numbers 1 and 2 in sequence header of output files. + help = "Read numbers will be altered to ensure the canonical read numbers 1 and 2 in output file sequence headers. \n " )] edit_nr: bool, #[clap( short = 'z', long = "gzip", - help = "Compress output files with gzip. By default turned off to encourage use of external compression (see Readme). + help = "Compress output files. By default, turned off in favour of external compression. \n " )] gzip: bool, From a11c3d4bafdb87fc0de166df4f7a495046c3a9c4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 2 Jun 2023 20:47:17 +0200 Subject: [PATCH 28/72] Github unfortunately doesn't render simple style commands in the Readme. --- README.md | 14 +++--- docs/img/logoheader.svg | 106 ++++++++++++++++++++++++++++++++++++++++ docs/img/ngi_dark.png | Bin 19261 -> 0 bytes docs/img/scilifelab.png | Bin 14568 -> 0 bytes 4 files changed, 113 insertions(+), 7 deletions(-) create mode 100644 docs/img/logoheader.svg delete mode 100644 docs/img/ngi_dark.png delete mode 100644 docs/img/scilifelab.png diff --git a/README.md b/README.md index 8f580dd..74e1133 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,13 @@ -
- The SciLifeLab logo - The logo of the National Genomics Infrastructure -
+ +![umi-transfer](docs/img/logoheader.svg) + +# umi-transfer

-

umi-transfer

A command line tool for transferring Unique Molecular Identifiers (UMIs) provided as separate FastQ file to the header of records in paired FastQ files.

-
+ +
- [Background on Unique Molecular Identifiers](#background) - [Installing `umi-transfer`](#installation) @@ -15,7 +15,7 @@ - [Improving performance with external multi-threaded compression](#high-performance-guide) - [Contributing bugfixes and new features](#contribution-guide-for-developers) -
+
## Background diff --git a/docs/img/logoheader.svg b/docs/img/logoheader.svg new file mode 100644 index 0000000..8adc96b --- /dev/null +++ b/docs/img/logoheader.svg @@ -0,0 +1,106 @@ + + + + + + + + + @keyframes gradient { + 0% { + background-position: 0% 50%; + } + 50% { + background-position: 100% 50%; + } + 100% { + background-position: 0% 50%; + } + } + + .container { + background: linear-gradient(-45deg, #045c64, #045c64, #491f53, #491f53); + background-size: 400% 400%; + animation: gradient 15s ease infinite; + + width: 100%; + height: 70px; + + display: flex; + justify-content: center; + align-items: center; + color: white; + + font-family: Lato, "Segoe UI", Roboto, sans-serif; + } + + @media (prefers-reduced-motion) { + .container { + animation: none; + } + + } + + + + + + + + + + + + + + diff --git a/docs/img/ngi_dark.png b/docs/img/ngi_dark.png deleted file mode 100644 index 87ada3373a1458eb181754933820104cbc5c9c20..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19261 zcmaI618`BEvHYc`iJDE&u+qUhA?PM~sZ6|N|#34SOj`ZYbk$Ul=QVM#V>B{#Fg9cKv~&E21_9ys;A}?1!N|_YOeP3NLPEmtY--N?Q(W?Y z#Q%*1$Shr59eJ6UJUl!YJ=ho>oGqAGczAf2m|2-vSsDHz7+k#UU5z{$>|MzJTgm^@ zBW~tm;%w#UYUN;0@*llM#tv?-0%Ta{}d6k^4%>Hrx4N=VnuyKeaM1pViZd?MEC$8IjGg!C?apiI-42*uJaYQ zsUJ$I7eePc2<}@=0?3O1c;0x1yv0mzzLwVr! zv83_cZ& zk=mu=2kOBY9IB=#bfhdb1rWh`Nox?#uQ^}L+D@)pG)jES>w9me$)&sU9xiBif7Y24@rKx;a zprW7>9RsqtnCS50^sy2~`A}-f^~ArUrcJ!~RVrHq4TbI`o&h{+ph{f%hvGS&a31Ro z`%a^`>qMPPI>ev%>4kf4>X9vCE#O973Xp}Ep}!39OmNmR+SYrUoE3}|)OmHixiew2 z;vng&d-4h7=e5C;wxF?@Sp3O@P)yoo3utN-_Gu1&1Trk-Pbl4}Ha;c4a$b6Q{RGQZ zx0ecG{M6fK*k-q%%d^-W2y^dHZtq*)|JJ2U6QRe7F(V^b#=yqLR@2bX(RWZaYZ0L4 z2%mEvIgZoW67HKAqpi#NvwinUNN1m7hPFhyeOzzc%%Q%5?jOG3_Q^N>Jl$Fq8M^;l zj&U3|Vi20`MqoT9Ptt? zNLW%uWsY_t5_Y4%uGM~XZT0$j8LU(7vHei=TUS3Z*9==dU85Fp)m_tzG$q_&DJx!) z9jmVRwhenp&Aa0sI#|(;{i33(a^3kp=@mDTBb~RDLr=53r)NtnTjfSe%6kJdkU^*A z0(KLb?8=D#OYCdep2!ViVsf%`ws$I4r8I`>70GYx-jhEJ5)#rTN@|n;)dQg2n_b@1 zVV+~x`5;@^X$34p+CeSZz?*32=EOt`X$=)a==6R(3#l+vT8!(RKfd=<)zQH(&Ldb1 za(U0Rju5mW;6G5K7*$}N9!ny>5AHWypO~27fC;j+omW$~yTuu1_I@}a`#l7%cWdK` z1-vSfNv2Fiw_p53!0XZ4OjT1;Q&&(>(EU>K`vdq}RmJ(Zu&l1>=9a1sA*8DlpYMA{ zn*%d|nh<8wt>2sJe{8Y@(|A84^eviU1sm!WadOG;!G_XE@-*7}zInX{7jCu9M7(D( zEKDzfQlmi-Tz(?9R{et^Bq?5<=n?XFPkP4}Byck^mMdkvi8z{jsXW~@`VohkhLoam56HJ9>;@5qPJk`yYC@? zHvm@kBi-G?#NBm(YCP%HiOEk#{6%EvOVm?DMrM3}GM#Bgc^zhEt0X|dPl>L^IR={v ziB|p%X2K7~S8BMNQQOqg`S_tU!jo+GTbt;6vT!xZyX*%*A&8~Zhp2P|8lTJFwwU|K zT($aywG%vwe6KkI){Srf{dLJV$Em^mG^^ni zMUHc3W~K*tBRdn*H9Z9xN}0e%ien%6{+RakJ=CGYp;zLQVBsd0>0JeZTc;;&mUBhF zeCbChh_?2;wvL8Ixwf^{FY_fO@4lRgy=KmY?fAu(=OJr0uLf16F3!pQ1yi2Kxq{KZ z@vHJw&In(S*t(7;?(R=NjOqIKnQzYFRfc|uN&AqJlMkXuWF2p~%uIopLxifI0!uQR z${b;(wL#BuQ15v`g%7-6om0+(EpQ-4?vjo-UbiOlN^*Z~l;XeM?gl4U&crYOe4!kT z&P86KfEnfavwoPl>UCLhjFhHWj1n)myP}e(>1z*grVM#*$LJ0$N2^{&`f%98g>}(G zc1cOJ?_X&x8vYi4`-HJ^ND+0I*hHE(xx2r=Mn~}Kud1rL*x1+r1`+>l((m?e=TC&* z(yN%!#1yc3&(P|t?;2cpJxkkte1(Rzt_D{t^Hj8^L?<*=yq4q{w9IG%li@ijf1@C< z7)Wsdr31^4-HxbVi4i0g`GEb1@S3LhkjmS>?*gs4yfxT;CG;}!l2lRLrzyX^6kR(V zXJM0t>5sGl9Kc7qQ_XGj!T3vmKSNImF4S|S1+RG$e=$b~vIlXwgyT&8^k5E}KxB;| zbk{t!$oF$62uU8_8TQn+cY{8A_9SyxlmwWF9o4+LrG>Po0!w@|+K>hGRj%VwYPDL; zNu6L^r~$5gY^xf{R7#xS+3L37yP45qfItWcUF33Nfw=*?n`dz`V%aBGtnD&n1aU%$ z7HMJtJ!_XjHm9l@5&HDnKcjzXOqznt9#tAJ6GkG8a$dP=wc3%~d5!nh<(_doKCXo5 zH$L*m7EsBzScS&}Jf=0hd`o)lzoomL+Gl5b1{e-S)0#uaMr8vlzWe&HOcePyi=`4w zebT}*f!RG<;$M6_ZmEXJSLXBHB(2OTu`ce~kzI?L6Xm$xq+YeX|NeKVVYV8U(giN%{=ww%rhUgDBrg~Z!Q=zNtOw$E{@d|y6+Z9=xY2+?QVx0dLSL7GNvlTW+f zrzQY`a?~Cax+BDSCE41Ln;3174(+mFk(!+ynk_`%dP*l@FouJ74 z>2keQAD)v%AOt%-jg(=6L>65Ze0-z_PvIHvQ1J)9o(+y~JO9OVYM-IvcsSl5DQw4b$Mt6nE_7ld6Z=jO7wqYf;BIs1{Hs+uOd0~CY)BgJVSU^glT zaG?n!2_KP$rL!sF_^^QEzK^(7bJht*7^`jf>#fbH-cY9$TyFh74Sg>&%0??TB4vG> zn&J3ns1K7<5^pkacS{M0utYRqMM#Bl^klmg!(pcv(gx;(M0Js6&0;O}GThH!9Jqh8 zwDG=p>N5@}pR|*zDSQ&WpCtyX=+nHU3K%9y66bo*kUZ8PCBceDrL=#Af*%_-eU1Q? zgg$ArEIHb8S=}#I&WZeAcHx@E`QaT<;S71T-$-$kR3EmLp-d?#ar?(N1^iZP32Ltc zE$O}MdwagZ&Kjx{c+$w(&Q)N=BMCWM9EzU;LyZd!!liGeH(YDQ)M`)6MjzwDs6iAf z?AF8=oD8|in+W;btEd#N3|YX+7N4X3n1$*a&8JxoLU@o2+|u-bUj;~HZ;#<@j!f^{ zfI}ffl9-y4s-|DW!WHZ*9&d-^N4Wa%5bA7Hc)>YT1bBt81W^doAe^$!xVr8*J1 zF5>k)M9?Ort;T&TvI;>Zx3Z^THOBFo;0s7^j*R~LmESbBTxDv6=26~5KGqAj>j}D> zA@2$CxD7AQZ{<%oy6UPPIB5aV81ct zvTv_1)`g{+XT4bL7^kMLVWM^ydR|#A?WZuPnjR^2URgz51;%9MF%aF%*NjsOKeHF` zu}@qo8^VnE8}1KGSRnPJD0jSJ3r+Y=&FOvGg+aziD^y?riUD|fh$DaaY6HYVg}m;nj*O^Zj`le&MtAYxtgbrrZl`gn(MIkg|P zjW%|}ub>-CsTz20XzJvtdvH-!RA437OMW)}ekr&jcTUD@{+p>AD&?N?{Fa8GF2{P` z>eA_m1r(=0-8Z``9ej9t+&3LWGXcR;6IjpYI4?XZvmb7_@??G-T4njw^U3!YjAXze z@S13-@uWN@d_;d8us)=9nAMT5CQR}{S$0jEsErtW?tT*=YG zq}EHBK(!24JMMJ0bcDj0K&YQA#}7m|AgX4hGb?ORBpeDJC#mYez1(gB+7krhBEoF- znt*5+m4C^34N;*l7pa@4pYQmX=${JHhDxp@q91;UmxVkn($egr#G}qxzUWo+qzqi# zdb<0gg6;E&+*W)jWX%3f9J^a-DYhFVu~L?W>myNVDm_)w&I>DmH^?Lq@y!%7#)5rg zi(SN;x5wi5t1jHvPYFJivNC(Xk=8uX%tdT)ViRP?-PDo)^})E387-p1YeAU#j6#h|)|HFtTXqKn@-xEbSGZ_BW( zxK;Uj>d3b>4zi3EW?nQy7;}S>B&c3=C25U9A96Ws{!@$iMwVA+H|`1ZR-F}T_er@u zTO#v1gttgPUA(&cz3_6*mukz>1_8GEm?ZscjiWhF3a3OwUQqk!CyW=yYw?_DD)W-l zkkEoL8WgOkXNteL`4TL)fv?l9NrHg7tp8sjn23yX$vZ0M>@Hb-4l1_rwCCf7n{>Xxaf*Io<4vO*HD10MV${SG*QL9OR*UK2T z1dd+nfH|+3JdhURXSDGkOMn(_I!<&gpz~RNs3@Bz3%?ON$WhG_Dox-e>c z4fRo~`Cf=8kvaW3FrUPv4VF#O&We23e2osZ!n4W7{8B!f_Bk0+JoV%+{J`1tw3|&BAh`GQ(!osr^{jHvaL4VJqT=*8^9%*^h~UbRUB3^t zxPD}xCCU%o*N1+w4hSn+vo8LtSoEL@do!*RnvZ$}Wz1}dz_~zl;;d91Q?)dQSCc*q z15uWT3`A9Lp7N-tUYn5y|7GOU_MJa@Y0y|Id{*}F81x7Z6EIX6x^7h^LeS(*5~0gC zC1VoAaDg+SUh0KyO$8qB7IWX_fA+1Z-G@A(n}IFVe|Z3LB5)Pg zpvc2oG*;k)98x+2Ch4T8q0F5!+r=Qwmm6`;6d5uDq?;Q~=hC|h{^V3ZYeQq8DD>>; z&wT74e8k!I*lcs?sgx|4_o9=ha4dLS?Afo)Lq1}IzOwbQb?tpm9it2#lRW%YcdbG(>~pa(;;hv4>N>_Y4HPG11xD~ z2_vr0GNuC~KxtyUOZm0p0kU+>(G6UlZQQp4bU6Vfl+Ejj z^6N2+!zP81N}{93^Urw?t*)e_*$0P80t?bVj%YL zhyZa<32_hp=KkYh_G*dU6PT=vu9cNUosO24w)QJa?quoCgIW)9&y}p3aA2;Fc&*-; zvmnPL%!~6@`hZz2ws1>xOplkQRB$Nd$c%MF)F+Yn2W_3l~II4%RF?rFfvwL=j zxMVWfIT-M1Tt6nkg?z~)RaevL@zfE4i%+2LDH|s zpw}Op>?T5a)Ebi(;YBGSbdb&gXMVFv2#tf{Yg^E7Rp@TLsWVF~6I?p8KbQFG)$=SS z8V*rWV1C+S!+F*MPN!>`Rn;I~=HXOTN#L=;9E+M7*#U!#G}KEw_OC2*@F3MX)`Z9P{ZwxMBsXC zT&QSzp;8X#hjrglycY(IZ2iNvG%l-m&}*&{kcf^L@ay z-S*o0QdQC$0?HFlp$n?RV9n^>-h$PR)n_=`I^A>CyC&kU*7>i$+eW;e_!B}} zTwZYSfbUu^2!m|D6c<`a!9sWq;U{6xX6#W9%A11NU@t`D*^f`PyH=3OO4d=4*51LJ zYY=kC@=UPb&~EXE)8RidGp*ShQ?a&3hQth6J3Ow5QenfItSzyAs4vwk@zS zrF{wf^rhFjo66B2>nz6`k>U2eN4!I9!ZRi3zdq@~iXJ1*O2lgSyWz#GRV+Kp)U|ye z8mj?-w#NFRzD`eSXinlP;D2(igeZ`ij;C>M4(ALta;`5#~J*UM^<5)?41 zD`0}>W8?`JV)^Q-QgAF2ryxI~h3dPynwUiON3|3T%F@eg@iyG5CnBI8`%I1GyjqeA z8ZYH*=vQ4yeCYgSzW+2iW{N;1Qm9d6rw^ON&wcoD`A|pVTNk$)J45f($6PrcY91HL}U;4ew3bxf^ zLw6ydEb?t;s;?ZxNuU`H^?39_S_p_19D)A&U&Lyf)7kW86Z|3a=YRP0QMvfjq|8r9wVnR zn=`80aHTFz^?40zEzp>%t8CO4#T5ZeMU5yO_^4UmXiLpGPAQlfBP#rNf|r?Eno$fq zgVGFU7nW-nTYj^frT_k->9!h`2fC8FSysy61u0h2Yz{PEAOtMDn(NhHch&N~uX_^G zg*1|%*-IiFUo_54htpiXUv%U8IX?Oe5C$zwh|tU(nC{qAe64i0Ca9?9Xtt)zYiIDkRhMi3~&!2#xY$2o>zdRr&h zBFobHB(lT;^sNl1Yd%?{GoBXkPIU&p+=t~C0bI8pwZ&qe$2)R}uOE$Ew8DJWVtlsY zL4~Qf`S(mc4ODTDrdW4g0$a_wYexZ6L<4_^&5MX_KTLg}H$5_auG%daV$M@g9NSsT zJ+8MnU_F63Sl1nPhV`h$oAunSt&L`j%=T_WQ*EHv`&hTNrxsKr21l39@-?Y4p%U2( zI21s%)N(0**%i#h1i`mIOfbIVO(UcYJVy?{s%t8WU#i2-qpNMOU7Oa(Npg0x&d&#w z&DCNu%9ev4cVG`R>$4gd(o~zTgR^H>2FpFGE8+|^iz2b2iPEJ{)!1QMWeX@B(_j>` z+RNtIYf1C)EL-)L9Y9EjNLG_QgRBs!R^wQIdocpJrfg_xI;5A~PzO;M5l)G=ZZNiJ z4hz|LemdQ8uM#sM&P6`_WfNdn4Tt!PnzO!?@M=Pwg=Nd;Zuui+$^CEaO}TzfsJew> z+!{$Y?sSHlzWHzSCb>l!VLxEuq5F-ncm1yy3zqs{**0k;1b8GEjT-P`kZ1_`2btX0 zJgQP%k>&7g84Pv8Db_9d>@jJ{Rs zgTL7No`rL}%#K$$t&s+-^@}~W$U^p&d%uW6nb8D2t~P3z<1WU*c-0t_XJP=JTA?;U z)!zh=+S_)P7Mu49Qgq146O(mzN=yql(2ffz$1{vov8!}xOX=VB^b5sRnCNm605u!O zSMah?aR!J~YV;^jPnfP|O`zScY8qEMjpYZowvxxB*@;6V&b$wdo?%fX4cr>Yqt?DN z$oj|$)PV2~2lUt+sNWvMU-EVMUK&Ppwd&dPrjpXXYmc{B{9fCc4~PH6=7kGyJQ*r4 zHF|juK1yA4qEmhe$R5$3&80Q96j=TC!tu#(lKp7ZTEU{Q`l9VrNe*Ny!|`IBR4k0m zo75W0w&AK$WBlRI;duFrX=Zf6i_m<-qYEc(O8ZXz?(OFnREFxcQN|+{UMGXsb7GAR zPx?S2Amg&##EWXK&m)$&q+hh4IRWg>Xob} z<)4-bwTQL-@P;H$FTtCxAgA{QLVQEK_Ix|2LYBdODFUYG$C3Y%-1=mE}Azk6!~w+fie*dC|?;bylV?J@lEAeL(L0%Quw(+JWv@Sr~B*^ zRWbvo@$)@^1B*exvT9k1FQMZZ-bd%bCK8)Fn-38k2tE?U#gXxOz~v)`fKR zW#mOVXG0P`nCq95lj%>=&8qM?!7M+=_S`pU%TuYl9j?|mPgK4 zgCg!WVr;Ib#%4nA*WGSkOJ$tEr@oiI`g1IPQ=PUEyA&X5i-TMB%b#_Ij=3vo7@g1` z?vjUY7#SuI&cNUSoG2P&?ihbNQY*1R4Pgzgdq?P^gloieCj;LA}U;{kHZRk!&dV~<@r!M2;g$$r@26q z5^+FXFJ5ax{Up?94MY5!_P(c<4<`tPFXt_ zmQlOax=%zv%v;q&cSG~qUOZ4EEWTQ$0<357RD_b>^6nBOeI+m;C_j10)aMR?_k zTu!P`j_vET*k%@*%r~Ie9mMWX1jl`U1HkrHOenROV*zL`gB)z+Y?E*hg<2$&&S358 zIh#}hNC|`+LC`P4lJTjjyGF~C6L5oQah>xhAy!k4}&czO? zToytyTg4|8C=c|d%Eo0_&Qsr#EjV^Y)?2qUy(QsSISdRnqrMCX9?AR>)H5TtL6oqeHY7|5v(mBOu`lA zGR=j^{dRaP?|Z*LuZ4c#@*+9Oa856T@$8SpB`CW}iD9YC*m~$B1<5!pucVe_65?dM zx?xE7-p zFm_*qwlgplH(I6ib2f8aAM*OR|DFD07!Zlxzw6}q+nKouGND^Wl_T68F7djB#-?x7 zE5|X0!ooGE#8BBAic??)r0MS30xdDr@dBTgAreBM8{C%jZ?`{_*LJHl*tMpF(fJGK zqk8v~h5Lw+bQL^wHT07K#(Lv1l>P+us^-c*zg}}7XWa9rTKDA5K|-LBYqM$Ee2^dP zr3(pL5TJ@UYnng?$7`g2ql}nYB8eTpQvyGQw0cooIK`Y3GG!D@uWcBA1lKu&#I@>3U`LFVktQgetH^_;m+pQWcD@A z54sG?eAC_+PwoJLa6awm?Im5lp2ZPMPCvqB1W}jO0C#-<@>}#7Z`Vd^B>~Evc;3FS z+S^c#QAL?zUKdTuCe9%Y*VceO&*t6D(zdqkEOCgizGpCkPZc%v)BD<$A-D7N(jV)o(KBbZg?>%rCpqI`3{YFeS?ntK<)U^M$RNV++Bpn zD>))tq7{gLA0FUJefoOS7j%SXH@OkPzXuFdZKL`JTEM$0koQ6-+^`&GeSw!tHXR^( z+Y1=RubNk;{#9M;QJMQ;FDbnu>>0>00uRVIyKkmnAuds%3EA%}j}B{*SDe!^FIr>s z?OQo!_Q@01tJLZWDEP=flP4EyMr-Al^7s8919ufT@wHFfX{5s!-Y^4RJ%BID6TW1O zYyHaA%qc3A$dE*;HhCL*+BRaA*I0607`$+gh{uKd;DB8p$haAGkII>I0m%oM-uHzT;iJSQc<=Q@al9D*- z11gb7b~BP0#++-)Ge)u*Y#V*1|L=1o^qg!?$AI~XKHONUg6+|IEz#KXT=6n z6lG18B$>{H(Kh0*jmxX&v_n`^Bl!I?*R@0tpQf+iIWFKklcwjTj7{UwKLeN5|k5kxwwVO5OVjRO@TOcwF5Hy2UP9kPc|)M>gF zcGW9Ok^BuzSI`wngiti%>Oj8~u!MZ@>wL3PqZkb02CGK0Mkh)~f9pahz zY1j*@=+JC8Y@n>6IvBC%8$n>AB>?tnx1As{fOTODLCHQFO}KGrC?>X5T}N)@w?onn zq6Aljli}_9mKy_`UJ>z!DD&SvXy<9Cdug8$8h6fAaZJo3%nRjRFV#ja?;5N>DsjX< zUw7FxjFv;Fj$1~x>6`6*&~nEP{#t;Fne=jeX7I=<=Bn{wGv+d!n(#b1hZ^Q=|oE=CC!(TTOVy@m9u*7Ru*9#pF>69+8oIb zJ_mN#SUi(LN}qR(JN=;}A&t5kU_3_4o8&}}?3#egh`l&;Mju6qpmFtMxPG5EQZ;8# zP)D5eF)K5Uzj@$6Cg|?YElP$hGSsQKJz8%C!m^&@mX`%f{EQo(*9dL^FZFp1Um1g5 zN~#9?M@=SOgr38?gMQdNCcR%Ju#>dQd@fRRrzGsJe6;MVLe8Xa1uM){H}hu&%%re< z>SB&oib>u{i61k9P~sia<0(*Th>g!^NNNA0Fva;=^~%}OVt#HTAEYtiq>58(Y_wmZ ziOb2HeM&wn2w*E&r|q=H)-y16=~8cb;IUEE#>Vcm8?oW+Zj_okpoRKZL7LBKjeS4Z zpud=-zz|z(`!se~*&SX=?09_5SVIfqk76T@8xD<%Hu`bAP4dA~aU4#uTU{9s?CuG% zb~+|36tahLUcnT!>t)^5s!l$RNSN+s)efpadA7AF*rk{r)n|=Q)F)S)EJd%0YczP?!F`dmb@4cQlHoF4;N)=X>Ga<421TaqJI zHER-euGexp(?(mQ{=FT4i$>D*n4_yY`M;{FVv~7mQgvm1A+c9Uw9d< z*jCP+^^Ix>-MkAgzULhWhj!h!ZTdim{FaMxcRe57HnYnKD3 z_++h|TCzL_yqkt#BWH|`h=k}Fgyox!vl=TW&DFJv{0R z%oj)sx8W7}TD?b{3=+d0%`YQ##|JX!h<>=8y8(J*t*&3`qRu6?9IWwjc89tU6#44G24mqVGr7?5$QwZ zxf8@xa5$$4mdtW%4~pG=U!UVN1FC+#mc7dP%Ew`DYbE7(Cjt#belCNwT6g86Aeo~FTMq{<)5$iI0U_ zsD<>!Syn@e=#MAcfE=0a{9$nK8B$nf96H!M<0J5eInm~&d988v z;duD2R*Iisa#}B>p33NHPeZr*U?o}NUt_rE^lgfBL>`<)`M4FgmbtuW)=A;yX##1`8#5HEzvm@&^fzBA{jmV$f!H;&c(E?RbJ zWnPDl^Lb#Z`ZMRDI-O4d?j8qC{}8iVt>3RuW-{Y#f| zWNzF;!|84!R{i>zLtrZH9N9vBIepW|wIBQT>2^M{nZv|$#e09vjaqGT_FPG15Q1ypCjv@$Efqv&EJ3Pial<1Ld6CXEd4G? zRPI>ZiE_hWkVojoJo^!=c^{>EwTfqmR%xlETl0PxoJ_>-hAdgDP^@3GFLIJ9_ePbL z8)ujn8WhP-tXCxfO|my_s3Jb6eC1c|IOwaIIqHKnQjRs=s?(A6j4t(c%@?Ko=+OxE z${7V7X2lHD2o)e9j*@9gXBh-kv&2Rs7r>m-r{$2mpGCmt; zEe|)^6To;)7&8=*nssNpuuljAB>Xy4U!1I>IR1P&7xaGr@Kl{nYMYs)9x|bnX%uE=r zVj<|32Anek?fo>rdcF4d9X$apcS2f*y;K{`X0^p0nQL%|SQi`|n!<|8)zZKA`EDV1 z>B4-x1?Q=cuFspzRAzW&IAp#u88A05>?SE&J>Ag?r+I9-adR?K>483vALQrYs6BQ& zYZ_K#MK5PyJYr}|YVzThFTF>~4FHVZeWgWJvpJ+#I?T@LoK$gG%iIf}B6Q4%#9{u4 zE}F(VVN39*r?Mxo z0+nwuO0kg(hKNgV-Y40c0~G2&!UqNjfo|Rf$eg;j@2YBL6ptI%zZ8CL4D^mX;*a-zywY0HU5hE$|LN2)+zhfTT*JQ` zioUTOib869;dokb4rk~Ct}{vF#1~myJ>XW-8YX?c+Z(=(4C zsw@bN;6OynS6Hg0wlQ13Y+uYtuk%Q7qm2(T-$#IO$HokxUdxsGq8(x*9xxb*kVMXZ zD&Hm$s2%F)yWaKSw6%bz=pP4CW~5#sF00?C`c+PMnI35lb0pmi+mb&Ui8w>L*_8^R zMS@{%;tmNN@xW_4?)yoQc1q3!k1ikJgZ5*Sj}*Qp&F-)W{%A>j!LqV;{}X<@w7&g% zTNJGM8A>5vRANWu&6I!pYj&h7cgbA^p)=$Vtwv0(Xy@iz`3CWixOCqvekceBDtuOxa`W?aOYYz8F@#1vX zNyO#q$WW#G)wvp975{uryHPStAu6Yyg zwpwINP>`FFX%O&;{kM!S~H_dIkT zqFKSwUQIh;qQ(|!1EJvZ`OsvH;ww`b9i>ua0o+=FSs}501Fl?R5$Pv10u$Pr=Z4NQ zgQ~N^j0;d(1ENoHDXEcb@BVSnWqIzDo=vu~hM#N_P9QkQp0j68R3R1oGzQ_hzcv+l zm4UFVjA|l*BrOp6Xm@v+1@e;#R+xk0=HB72Xmw<0A)CGincBYzw~F9+-1MQVE?$^p z6L+3W>I2K@aMiaju$`t5y58Y6L85CZTIg0Ew z{l-H(g15f zb3*J*1+ax0D!qp_qt;xo0sHBUP(SVCD8)yOwAv@H2eHO={S*f#PV=7op{~`h46bPf zs8n?0cf%f*FwI4`W?up<&?u`kF7!~AT9QEju{-qs-p3`KCX zx88MdpO~23Z`rCuQJqd|$r@~JgSs77CrlwN5xmc)JTj{??7VAxf5dQ1GM?k@=f+4( ziH)=gjcp07e9JxK+f6C{R&Uv^X}QvF(_!X&PRu%s_81{tSFoThV2)@e;SSt~@E03u zcp*SuUVqp2>o$j`+ncHPMLJJd=w^Y>9LF26NL*RuV3*}c!9uyr>_!mWaqwM(ffHx) zr>MQ@JOri|7;Zi^;U#^sv$d#5Y9B~Jh{u*xjJm&Vs&EFIKMf90n{oaf=8`U3YzB(N z$-_$FQ#`^^5V)jp<^?GFcg`e4(rxKboFtG3*Yg>9WUvJJwNb zChrhuylEIt;{+|GU0<@LyB?Z8D%)PKJK(OlfEwt3L@z-_Z0UBa7l?G${`7i#VOoOH zV7-qk?Yfc+>pNv@03d>`$B*mUu_ zkUv^BBoHkI4^j1cQOLhPl!YPeO-JuFXddWs@Y~fTY-4ZI8+N1NMeTYkg`;n-Rx6`&QHL z!^eXoZ_fPZTZ9t|;8Byu>rg#BQvf2>60xk+^)pb6shUn-UGZ(aVvl=qzGxo=P$I_d zm#S|dWDr-2j3*z2IxP2m$4)y_fD;}f!(`lFT^n&O8(tP`J1;mN57Xw!9+SA7;2pl# zNa zsxy@#=PfrY4c@859U77_8IdErCdWfx<|EViU6DSb7{p}i++Oqh>(m5oyS`<9S%F>O zE~+=oVJWAi)O~V8w5Hb%6c;&3l##lZWb!wIz5!;KT#Fu~+5r_riD8C{_1}pHH59rm zjKrY>>o!s1{3XxKdD+9cq!juR zf(Z=LpseQ)5`6~a+k_v2Aon$qyd3zKqg74P&>G-S56m&M+}nL_MM*sLwnA#9Nz$KU zm^z#r;zpTTN<^cp!8@qhQ+<(|_8acqY1_~Y5qIsP~n_4mFYgW{_ujG$cvkA8OZ zDPcb@u>~+JU+nq#GD9ofR>b=|G>Bf1jLo3M&BE?k^ISt_le|Q%MZkz~7Yw?GQDPox zo}7Ts-_Hq7JA{Z$PCHCK-lrh>pIW+GovwQihs*EXL33b*A-&y~Q$sV&dY@^gbWUM8 z{rQS~5h3eV|3?573hMRRD4^{-2&)XcOept;(Na>Ho0AGtmmmB!!;Jt({f>s#9s6Qd z3dcr{{x@SB=#`vT`AtO2vyq(*HUs!hFV5ZRYE;QiM8KCqQBjL?KQC`gVUxsP4 z=dM}gLs1H&>i_@<|4BqaR380wu`3{M^=5qmk(I*-?W=9aI~qh zcGr^bTB^JUCM#z4IPgD-&5X(L#HXPBU0NIJM^LvI2J!P#&22<@k(Vc_TZF-R^)Ef#G@i^;vVm=y-w>A-C2pvR+RnG( zg#CO_*D}t~Ia0zbNas=wAOzE4;9L;|;V>uZs?BxFmMt>^!=N@2oMgU9(+q;sqSX+e za13IPr|y4|w;RoT@7Cw~;G7sxG z&q#!0ez>Cgki0H=-f(p|aNSWoD{h;)e4lV#jTY;t0`|y=~(~gGa~&O_YDK{u;Y9QkWS7yq-Wv^tm5>F6eE9?{#eT2yj{e7r3R_Xi%SZ zAj-ElV(fxNoli&g{_^CRE0+6$!PEH_+=IsrwLb+%$ATIAY#!*O3xNRD*-rs+O;!yd+lxjW1?VJ{?o*QIZBJoiP>1iCN?B zJv-{qh01LTh040FRRI}}_x zxqSlVxaEl_W_jhi&g}!UqRvN*179uDwb*@(nQ0dNE~8#u+c**bP}=#}eE|xXv-okH z6&u!XU7IX@b7oHd^OfOEuW^0-4Dvr9-NnPYXXb`>*Bsk^GY6k30)Z#@j2bQpwqv_m zbzwGrO?&zktPz|?g{aF21UpoFv2k!92fB|7j&$7?R?z%|_A-5EQ1(d)I(@kU{{aEu z$WHupo+7}d@KF56T&n`&7No*(j3s5PuAGWdwZFWKn5Olnvjn87l0~||T$dDCi>PmA z@hBJ(RT{)Fj1uJByFttO_%Sy7vm|weKrsm1jta zD^NBod-8FuDTeTL5{21>@zkf&){udFts27{|Ff+#yj|0rV?jI?tN19x)3Yd`zq zC%tAb;?t7tgK=|>BQ#}U!2Ik%@~i+WXljRUT?ViHX4$E=^$~qm^lHAE$~`aZYpT=_ zitb0>PNOe6dk~%$riJEq^ z&3w!C7Jg;Y860-%<}qfcFYGhLX#1d_8)80xEP$Q8bh!9!Tm#Mw19X9{XK~OeL_8Q=Htfr z`jFgK#&Q&<=DKA2Mo`s;>sl3%)Q^yI(JxlZCu4bMiLGnByM!~ecI$AN#N;G1mA9=p zdFG78g-ffDV=fvJOM;wMW=yBlMF1)SM{Yt3#90JuTMzMVaMDxdX{8%hN4-wa^l9!u zi{FGF@2)4`fN9)!^o6471Lrf3(l0S>TkzkFx~0tzA$~pepMo!`Uk0OI4sG5e&7P1P z<8ec}{TUPML#R83!8-^ta|g@eH`6=|ID&eXaw%6;T2~t=4&DpwYBzBNU%NNNBZ0af z1wY|^15N4kgodV$5y2{&$t+^MigJy)l2x3Q}`)XFqPW&1RZqR>lI(Zr+IZ ziUT%!I^=A{&<~i^l?%wC+Sr&d_7-e^!w7Dk1`p@b+)ZxgH-&TKuAg3jG%;aH!?Xg^ zsNHm!rsdP?P7kM-Nl#apJU!_N`p*ZYmb@oRW=FJs17$>b+~ntrp2M^YXs}&%XiBO zHa$I==i=hg*WH`uyK&Vqg?6aSPWGYMOb?7shnuybI<$L~_%3d9JXMCLSszGq>5!^3 z373kiZql0JLaJ?cubSW!BJO^lS7lwNH08PRbU%qlqc%u0IH$oi!`0188*?*8$v(Jr zZ|1w@TJu{S=u`oS-GAMDZ*Hf=HeV|Je<<*~j`QvQ{pZtK<3lLf+}g&K{pxC%{~?tA zuQVgZp<#EtG)1Pg=r4_s7&O_&kG3n}w8jI?eSS7gc37LceM#@{&ddsKJ-hr}6xc<9 zT@=_wfn5~XMS)!u*hPU|6xc<9T@?6#i30xzzpWb7J_~HV00000NkvXXu0mjfNqm<- diff --git a/docs/img/scilifelab.png b/docs/img/scilifelab.png deleted file mode 100644 index dfdc165a8d21412e18b9d4db06c4c1b882860b88..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14568 zcmeIZbyU>d7B^0(ph!22NJ0Sy5f8XA$Rih?d08u}sX zyDJ_J>aSr>Y7H71PEmlqk+-fjj0x)LZtvs*Ve z?NuPKxs|Y{S|d5Q_U*WQ_~iKeN#MP=s@mVS*48UtNKMUSNOJ%kB5kevxV(C4yHB4Wyh~D*?wqj_;O| z1T-y?lhVD6)2g93trGV_&HbC5FPn2WyyD%Nq^Z^z9v(FLQkG$Uzn=TiO_5I{hH=a~ zSeZ1v2mZ0$K!3uny^FE(xm?VH*VAU`#Sce4?eG5I6bV^{rCiuEDSJnRJ0|>9xsfHa z*)KHAYVSO6kl_W9UVnHL2`A@GchxghyCDLIG2=os{f3r@PTtjl#&BFEH1q`~l}5f0a`6Z+X1o!Fgha zVJ3tnyxu((79*#a4fhqI-=r}Q*qREg3yHMMFap-N*|*Qw^f<~ZGYTtFOSMg6f6;0F zdYtjagV!3+i`)GQZ&Z~-vCddQLpDnJ*+v3FZDjT2P zzGoC)I*pI|q5JWjy;g5Z-u_pbG|IBp+0cBUbE{{JMQ+L$c0}_1Qbh%{nune^UG*eX zb-id<{prykjlIS`QKC9AX#rm+y{R!Hx9>TBXM=_EpFFGkK6TSc8_H+M<8y=M zwW521yBe~#jXQSSFI@WApQ_Q#Pqqx>ME*T8Tr16*7AHrvs!{yoH!=T%QFKB?8iZDg z;j?|od#x8jWtvP6)QLZN?Z(H|e`=pG&2FF5GvLGSDOxkA4}Rz2YQr zG0=28yH@TxEvBMontUg0^h9dXb?|95&kU=V#dNxB+Q)m>jqk(EUIk_i<*3=oc-l^@ zLzp&+j!&w)R|wDteDjbZQS^V+SLyY0UW~Qj+{N=k$#4KndxaRMDm~{2~|{ zaCdp7Fh6q(6s7pOm$|mT@zocvB7brG;V)W-YUR{?OypBfeP`a`S~o}1Q`Z0Z6m9my ziEaX|VMS7i<&l@-$2Pv_!S}tRZB+NlM(U36(>+EU_qs%%kGJKT;IMEsu&Uix=;tbc zg!M}(KcLK_SDV*ZNf5Q}FyCXM%9uA9#wIPOd5O2KPpT$lFy+7&BVY3IOkesAH10y% z&#-&HnE0vj0n1o-oM(Yp(wwe04To@xu2sn0o{F|=F9xa?VfCp4VUj|S!F0umcsU~v z8R@7*k{7r}?7VtW&+)Be2N@E}{mi+^q(WmgWkT~zK1d>?9f&JKwH`ndFd1!5eX%#e zNpiR^nO^HL6x(V(0~&Y-z00~!H97ixg@N1n5#D{F7v1Aq(`&mM0l}2?W>;M_AQQiv zQm>IbARy_Z8tCROW&kNdqTNr;KP0euLG zEt1ftDmEAhP=-$JjS4(BFUI7}7CB^H^p$=>8Xz1wBTK*AGI#l@u_bKWpq1G1)1+D` zl?*#g-yF6(nct0n2TrD33@JZl3)Y>xg#pj$5bA(vX6&aiW7v1AmayG=mY&QY=?1Y# zzuXyG@*#Ni2|X0vlgoukyLL6!5xka3WKCm0>f@xC!nBWr`P!OYGjQ(%uZ;|YvW2u? z*%!PkbE*ap`(P}?7OuO-RztCuIMMjpuH<7FD|&7vhRh5bYid_K;^@e#q_cqY2n8H>SU!h=RbpV6gOwZ-Oi zll$>szjrLuK9v)kBB|0YL)U4kR|5d! zipPQMiZv+|6zOUR6pQaYi8i2D=edetB~ndDe2X-J?%7stA{RfDMwT++g@n~Nf;bvg z;&0RlxNRgrJKdiHlRv}4w0VL3n^d@hv%-4u^%-Bw6J8s7%gQj?;Pvzgtg>+6&+q#r zqgn3SNc7&!2&t371Ex7Oo@qk6UoEKyHn@7t-itoPqNu{c z2axIO$WaR)fg>~%qRFUFEdl9r<@c#XDYV0JCXXU-zU;b{T`otZlOY6Oluo=A8$zj09DWQw|=fr4UK=`-Ms!{0mTQepEZ=1pNEgv)s^?(TX=aZ`l3Mo zQt1EM!b=|&i1F$|yxe^}Z6S)j5I64!{|;ei`%inQkEhFTbL?z+AubSCl&BY~SN{L# zQdw0)`=1uK6gW7!LVveHk^LW%-cI)aBI`eNyRG?c&c7>yYW@%Ie@Opr`)^^Cl!k_w zg1fEHt$L~ol7QR(#q8W|o$SQ^d=;@4wSxd{xJ7Mjf!uiU!ru6PT|&&MP1hmqTH z5kpynV%GZBQBeTDVNu42$$LVqz1=(B9_J!E}?g-WHPnE5swVMM3HNpS#sK3jd{x72? zWX~@O5drdZivoeS11@6E4dNH%=N1+L3IJ^cAR;!PKj!^|?&WUp4YT%y$U2}bMOlOL zpx@RovH#(c<3E*wIYMrU;u8RJ^9ghF3Fz|+iwO#e0R_4E1jP9G0K9)$%zHbp|2A2I z_y0qR#BYIrTL@6?{*<8>7}R>j`_F>)7iqUV{y+TrYcT#F_JD%^Unl<+zyG7_e{}s< z4E$HZ|MRZ@(e+<3@Lvi4&%6GAql@5QM;?e9Dhz_54m4pZ9qp*Y5RQ$SvI1HR+6LN< z`j7KnR0%#*#n=lCjpxDbFS_45XV?8<}p>sB_*!PnQJwt4M9VmB4pH!#z%J>#|0uoh-b74!FJRC%+c za`F<%I^-tfZpF~^Q~k+L8)v0Nk8|lWz+#6e^c&i3w`^ zyG)`LM~wCK(pEW%k#lV3)_(j$yGh`}ntNfRpN6yBAB*Me4mLs%=lHEkVdftih){*3 z>tCx3qQ6ze3)THLSyJ>|e_hhq$cq+*!8(*`s}ns}{pp{|BExmb+_Jx0?(^@-W6xD1 zvZntib}v)kjfbxVqaw?;-xWn*hK$y$SgyUK%zrd$U`+3pP%Se33!aoE^;ha*1C(Hr(SFp zYp2@~SbFN38}=H(fMBu`D$q+br9jsUTMMR_$WvWuspENq(&R_3%t^#90rDpnL$e`k zT3{2w{0KIP7=&pg`bKd9$4Y@UE!)+66lDh9n-ZK}Z>c?XjKQ)~(L)!hX7W8&sD^?y zapfZ@Iv;KCO2DZCFB8E{|@OvQfGa%4If{3IbnWGeAKF{E{C5#@G<)`&UQa8`b?wCWA7`qtfR43aWeyt&YX4qP_a;@YR^l?~X{;^XAccyZ3U0 zQXeh%?8{@B((Bue3!hMka&Ky^(hz5;jspBB&us*FIB14>YKJ%!;Brd@TBzk|H?0E%#RjGXThS zI>%Gbo-}LPM{k+f($Aj2pt_ywyNGwYy2j|b?OZiHeDin>?$iP@F&574s#_sfy zWfk33dp68Mu=${@8Q;aI%QgmuT!sJA)!6K1oo3ok2Sg0YnRIiBlE7j8Vd(X)&&k8) zxLLKJV;RORnrxWvnC@{s(PkS!;7v@6m#7t1 zB`=SlCvXREodfX80AqI=jq;0!xYYwIaKX_lgX-PnU38Je9$TRqEn)0Ay!v<4Mz5Qb z-{T#LV)=0*Uwu|wt~6nHE;QPi3Fz|u_fbX8Rq>-nnZoBAC zVo)s!bw4Z&YX=`00z54dgX8GJck8d^h=SKXB0yHN6gLg{6C_mc;nJij`20=EVYA`$ z7^dXIY}rk(Y6bQP_E7V|maHZ=umAl8g_%`E7Wa+Is4%hT&qZ zaFiw|SgaV`m^x7h0*H5rY!q_`AcC#_vrx_@l4hvG^pgEs&YJl~A@(2z@Lg;vjPfHn z3H+@NVvw_j1-Q<6&!ds;OW+&xa9bLyh3{B&_dKWV5$BL5TRkg`712sfk>T6fJh(dI zfR;=#bSD8Ki9AEiQbTn}2p)U)%TOiZ{2dxE$#UIy$EM|7tji(w>K-t3F7VLf{T#SO zDcqdaJI9S?M6yQnZUP}%jc5Fo7dMmg89#^1?qo6Vt^QhAr(YDP zbBc7|{ibV7zGg(mHhP~$D0R>Re1W;-aj`tQ-pz(S2;clg1kzli^cal5IUU$#=b1T3 ze|R{UBqO{C@MGRctGPIuA?a>attO%erMS|a!GOlT`TX?$_lAjj zdEk<()gN(umUsOq*lS%w(Jb2D#Y$pNFgjq+FWHF*{acl%R3uD0 zBtt7sk`KF!8HgLU=^uMxNfNNHW~{Lw1(9rs?||>-3XMyeHjk2lXQ7U7+KUy%YMJ{! zW7M$~UU3u6kVR66l=sb_V7bv`GJXtH@BZ0Rrh9Sq87Io$9T^W0$`|n%K4eJrEH?sQ zEH*7`VVuX#v@q-y!rgcQUNm+&Qi*CDP0rz4`51LVJ#`ky*6OmJq+vROpk52|yR%?Fsd3r%nt2byh04`ks2TSTMT$G^ zvg!K;ItIp-)>qV>u4|X-3KowRsy^RylN8fZ;CZ5!L91j=aOe^OCEefUt?P7Jxdf$t z<32P|ZMbC4*6LnRKQvC8+GPZf4b)*J;Wca%TG5g;s3~Oi=KQ2g>?s3{oU<|4F}{Jz z)2b8?b{NL9;_p7Npq^$<0_KSQ;M%~oEknKH6tAM z?O`APlY61Hk}9fBU1w}mO6~ZEkChmvKlIIaXWBg^5^H)oZ`i)1e11Rlf@4K|L(#fv z=+HauLUSVGryj`}`&SG<;KsIfq7Zo7PQH2e`xn9^xjxR39Cs=qd1s*42|>$eSY~T- z;0p@ms*BW>CThb5e!B}SnQ22k@eg#E;tyzYmYQR=y>SiJf9)xuryxCz1`BC$2{Jpn zuGjw1d$9)VJR|Juq<20i^Q+L2803oAx|#$cU-<)mnMch(A}{)J8pRiF{A(tEwZ*2Pr4Ikkf44WN11`ci;cW8-#Sk9k}T*Xs}1;2(%MNXH~ zk&XuZFe%W&9)k~wqJk%cS}%XOkFlxyW!t-Jra^=Eu9MV$xr z%4x~XXBn_E+5-hUhHbo7MPCpUXMvxm%?O|J^P(d-m;ofN`ejF6%W{E|`CU%s#!pJ# z&kOgpashw}vWum$!1Z1I%Gqy32_MxXAmTjPTFUe<^Yse`G*Kaz-V?i-=u8~k;wSH3 zbi2d?!zs@LnHCFX&>X&n5=`X%l4l|u_<4-S^ZtaD>KRLz!9nm!l|Yog^EnbQ-n_FU z`6<0|hQSZBUI@o}F5nI^hK2s0Z!tx1Rji4ATpoLH(W!#=MjclqoY z`Q}ig(uKa+o+7$1vR8uhySF9Lv*yYr$K0?HuEKX(Bq@U!*DZ;B?8!J5n7*3D^`;kT zC-|%cv~p-fUE8C?n(O4W$X}#(G))YO;K38m1vdPaC#qCP<)yxXVtDtWoYhHULlb@Y zK{#%6g$hf!6a^)(sFM_$e?<(o7-EhDb08?w%D4!lo0t|<^s31tpG`uvuLR!cm8;IK z0S_R85-ud^uYAX@*92GW7n!`3iXN)CWMS}e7um$mEk{Vg1GFQ@Y;lpq$QN;0+pzjR z81V6P(^j;dW2w6*u#}TQ6?iLVH!76NdQ89!m@nCEMtCrroU?-^Wfbz@`~E8?1x985S(Zah zyEiwmme#{|QKg-zgRr|xWUcIthz?uhChQzNq=^GBJHbAY+hFxj+vY5W+&Z6`Tn=A2 zA7$O>FTsJAm)HYm^;`qY?!b!KU2=iZdUuoC3fb#YEqapPQmZ6;0m$KJ#jEkOpP0teD#HQ<3K7t%(%Yys^DL8)cc14E} zQ?BRrhDWM)CDP5LoB zbjwl1&s7ZKh|R4FHLI6~fPtffkzwY+7JV?|o@)4_e%tlp{LSNx_l@vltfZp@#r)?E z`ETL21P?jR^NBN6SFWSIx>O!dOYo4_kc1VZ~N8M38h@q zf~w<8#r<3lF~O3v@6F26@F8jyud(-=^QnCArIbz;L?pfWnnjI?vw zQo%l#D6y9S!5B8Ee?$SEt3GnTs}6tB?1GS_;ARVO>^Zwj+A`E^=dd88 zwj!>{Z1v6BL*&qN)pHPdJ6xxiW`&}jyGX9D9YT3o*mvzo`NcHY9A89l9P=W^tkq26 znz}tOL=8IgG>u)FeZtJDI4;P@etNJI+mN7_H6Xy0xCUG38!B+6q}(W;d8^xxfG{ba zM$6H8`Vyzb=Q2Fh)ywH7Cj(XEw0)+S#;5dO@RJ&r^fUYd(FnYsQ4YV+9zD?a$z~Zy zsQo@Lhzcy9ker#S`v@i=5B7qg;i^xkR^%eepy}~F+5F90bP=T5^tyv3;mv$g>~_RK zd_`3v>X8M!Y@ctXaqWqZC5nzEI({S*50K|H5Q#tus6(F*kfvuTqL<7jBNSF2GxxaC;R-AG zX{gL|irS%ym}AUIK{xBUh*pDwc18Z0a@wW9*zqkq@)XL;&HXwM@0_O*Mcq*lqs~@FseZ)TIE_ig}r4`$<#Dv`TsTN7jgDv=m* zg4RE1oJpG`58Uowb>g~b#i{L$P+;oUlw@tlK2um56osD?+{{K?TJo>oKa((iB~yVN z^RCNLIKKri{T}nNq9_l$j1KC6-onrQh+^r@Vt%^7K@tIXrwY(|tz(77w6tLFp_d3J#zWK2UB3ppGPCZs}-NHMU)>Ww&# zc!y?}C-yx?72h@;Q?kMAbsH!FGx#Nhn1N%Rd zog!wuiDEw5PY-f65iLJJO` zlYzByn{xfwc;8Cvd8Pd7Z6n2cJC4+CRIMV7T=aY#n7W=4NgTH^J2;$lFQo!1%eeCk z(^sAsIa2@Wolfq&FIHFSrW-2f(ZfR3(M{DAicdO5dEWYDs6vNpmiP1oEoPBUDcwCb zcsKR=7FW%s^6c(7M7M$3qCK0geQ6FI=K^@Dn1VW!vbOw)=N-``NwFvKH&iIUpJE?nl2+p7*~fZR zg#O(m<{`uRfC<7;c94e0bWN$m4s31-?p}P97T5)<7z0{=gVe>4u3Hp`a`$Lgj4JYH zhM@gq`))n2&Pn-DfDV=TV1ov!=kq1+RlnE<*J~YoB^>LFe#+f7Xyr3w5o{XuZZ6lo z#_dxk#Me1y(oc^%`JCbT?)R3U4KF1$hd-L7)=#gif1X^x4CD zy7|u4fm4FDtwVkuxp{B1^UZ=&B5|ZZg^(aU?f%=_EK+^*c&ndABNq=^Qoxsn${85S ziy?6$@~`M6IV>s%^+z$oXg!7rBUu;q!Xg1rkmhBto>4^kXGt4&NUHLe<#8E+liNFA z;@JZBLPIq3$%GE9$?r>dDyFjsSK9_sJwqkcr1rC`d5hBIZ zyd4a#3YLVWTW$vJN>SLz3Y%{PV{?KBL>~Jyk*6Y10X|OnJ@ zog2eY!N)j5a`*~svz%v24h&-kZ7;+Ee%WCs9vbGbIcoFlU)Q^+so@>GvK$F$9FEYE zD|e4lyh#*G{tzo}X%V-9V3104476`&4<5k^I4t+$Tv;1%O|y}6$b1c|QSHa&cILkad>)Pn>N)xZpBk=pbfNVO*8gls z4;Ehp*yHTSIITa#Klnl~yUQXPTf%_yyg=5p(xU>_RJrmS1#mhlOTlSWRQdJr!r&%^ z76vbP-u|@+-Y9pUCIVhgYop<(u0SyrSIRMmpikpmHXWus#ons~gNHxRLtXg?sLPvy zamWwPEl0iu*wF;+>+NTDqyuWW#dH`Kh%7O(-&r0oQNItx}asfewv!@ zZRT+i+>kV6TTMFL*R9~(j1CYqo=~+ThLrPW>)e5h@vR=uQBANd*W#vHU<(zX8~{C? zso;K~9qkX*8Z%1IU?HR!FXF|I0c;RT$lWP$Py+Xo3kh?+7iw4X8Y{3&3i6=bp7JAU zSSTuO(UxD9^f~_ofvxY)5x-^_U%CB3(l+NoIUtC)kxmTbr1(jG#F4v%e_gdVBKt`SA zwkEUCk4It3V4+V~Qa%l#80k*|05h598_cbmKWGgk!}K<)RKrrsLc4qKm6u1U++I8TwPb zQCHjbwP|ETsmsWsGz{|^OX3~;h;E4Wr=znaQWm^zY%^O%Bq6_VPz!34+{Kb8gL9=f zqUns5hKPU2_}%&;&1*J6Xt9?19P!1IirDQ`O|OFbA-Y0!N}Tc)Jun zzF^ZqDTb4U6Eo@|ne@=FmoO*&q!-?3P9|FpA3u*i*ChE??X5mvL$p^5XJG8a?RI~6 z+1%rgbRo$cSGZ>Zi5joOU=01L}$@`0~I!yU;^Ag)i$w)={ zCW$=E{BqF>uhaiS3$e>MXH4L0SdL=-nfaC6q5yKV!1F@DJt9#ynD5J~1U4~QN(wV1 zCoa*?Fm6R*c?+X8_yw}q>@9S1s~Bn-TI3@M-wa0tGbesZ1so3VV0CkxDkzLSo!A|3 zSB&AZuac&%OB48hk{ykm*FaVo19f~73gF+TjdtU2eZTKL`Q*|PBYOt8L`#@+*f zK&_(aUGyEEqHkwfzUmej*BiY&GVVDIfn4+5bzC|+xl8~|!cYD89v`@8V%T|@vQ}Ws z2W+vz8<+J*ju9dCw#l6tSIX!YWLcp<=>n`nG*^;Sgch2_ElXnw?vsE=fmc!fvLu8Z zQizHxOWPV}@W3e)~(8DXGRV zWVieyHtvo1UgCOV>WUV8(4;%%Xdo)IW5AIhDg<0N=|u&uEj@x##(e6Z3K=Fz8}ZOY zOvgNW9FVSK&WIVQXlF1{_~cW65;$OY(?<-%h%VffyX#giM?$fb6J47O=res8$`6$o za0fW)Evx5D_C@&z6BZ^zO9I_XW%Bc*fmMw2cx_FJ^aD6JBV8mPGES-LC__1JX!$?Z zAh<_UJOl>bGnc4@^DfrjmF7}X6EHyqR-bj7AJ}n??jtXC6^Prm9VB&_zm$U= zFXP!C3k%_pGz@5c=Vj3!!wo|R{HlNUDdwJvm*TRc74D0#^|g4EKZ;u8W=0BpK!g+h z&W&ES0YBxCPptGvsCSb~TF8H8H>(AW8v=gO4|K0lG4U`KpCJ?xnw>GPg~R8BgGKkTlS&0S|CR!j0(RBiB1J_+gsuOm1 zCS)SL??Yow0E}@5`tu}f=yP>Zqnip9*pir+<_axZl|R**rj$!J?Lk^Um=R%sX-8%^ z=}@x0u(%)-`;@tkOmjjK!S#8z7F$t|sv*-yCI>V--ZIQ{oC}g{v3Oyii@pQI0ep&f zjqVI+MaWxB5Y6t^mCKzMSAFM-+z%43?2R8p@~eMzu@kTfFN^~)u8X|XHzsF!iT^e^F>f+HfyTPovWrTKkt3BWYKN!gVO{kdb&Pk4$70@K@EA$_DyCLk9im0G+b h3jN From a0193ee359f9d3cc9228ce39b087305576be2f6c Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 2 Jun 2023 20:50:56 +0200 Subject: [PATCH 29/72] Increase Logoheader size. --- docs/img/logoheader.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/img/logoheader.svg b/docs/img/logoheader.svg index 8adc96b..737aa3d 100644 --- a/docs/img/logoheader.svg +++ b/docs/img/logoheader.svg @@ -2,9 +2,9 @@ Date: Wed, 14 Jun 2023 22:01:24 +0200 Subject: [PATCH 30/72] Remove a possible .gz extension, if the --gzip/-z flag is not active. --- .gitignore | 3 ++- README.md | 4 +++- docs/.DS_Store | Bin 6148 -> 0 bytes src/file_io.rs | 14 ++++++++++---- 4 files changed, 15 insertions(+), 6 deletions(-) delete mode 100644 docs/.DS_Store diff --git a/.gitignore b/.gitignore index 9b96c89..f8cb365 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ target/ *.pdb # Test data -test_*.fastq.gz \ No newline at end of file +test_*.fastq.gz +.DS_Store diff --git a/README.md b/README.md index 74e1133..4c19d54 100644 --- a/README.md +++ b/README.md @@ -152,9 +152,11 @@ The argument `-p 10` specifies the number of threads that each `pigz` processes Finally, we can then run `umi-transfer` using the FIFOs like so: ```shell -umi-transfer external --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq +umi-transfer external --force --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq ``` +`--force` is optional and skips the prompt whether existing output files may be overwritten, which will be triggered by the prepared FIFOs. + It's good practice to remove the FIFOs after the program has finished: ```shell diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 4a1f78b6babd0011724029c032fc71193bb32ef2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOKQU~5S?jKFm#hmAkdAjkQ=DMJ%KM!l7L&VqY!A<+Vx~TORv#4nhC+g?b5Vm z2BbHdpX>*gJVeBs*ZrDkK|~dXAd500VjgwfSnw^7rN+A1rT#cl;`f^5*=O`ZJ+)nX z_59}PQ(bSnv_)){mhYeU%a4akUZ3_qc#qHDReftthb|pjR04Z0&VV!E3^)VMz(fq# ztyA4iw4ZO|3^)UShXFYs0)}8T%!=jcz*JfQ;2h>6(507NRTvb7kj z>97Zji-uWI(}}J5VB7iJyl~ka^+O3Kj*31z1J1yZftfBBa{oW#lNoLDhbg{t2AqL^ z#sJUjrrzMC>~6h!J-KTG#uJ8!#AQ(+&<8&OSjaiDDo*tW(GeF7v!YZHdr1fSk3b>B KCud*+2EG7be<;2H diff --git a/src/file_io.rs b/src/file_io.rs index 8029912..bc40c14 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -129,10 +129,16 @@ pub fn write_to_file( pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Result { // handle the compression and adapt file extension if necessary. if let Some(extension) = path.extension().and_then(|e| e.to_str()) { - if !extension.ends_with("gz") & compress { - let mut new_extension = extension.to_owned(); - new_extension.push_str(".gz"); - path.set_extension(new_extension); + if *compress { + if !extension.ends_with("gz") { + let mut new_extension = extension.to_owned(); + new_extension.push_str(".gz"); + path.set_extension(new_extension); + } + } else { + if extension.ends_with("gz") { + path.set_extension(""); + } } } From 7d1d90b069a3776ad35f94d99db5688e27448e94 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 15 Jun 2023 15:41:44 +0200 Subject: [PATCH 31/72] Add the first unit test. More to come. --- src/file_io.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/file_io.rs b/src/file_io.rs index bc40c14..1c7a94d 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -168,3 +168,31 @@ pub fn append_umi_to_path(path: &Path) -> PathBuf { let new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); PathBuf::from(new_path_str.to_string()) } + +#[cfg(test)] +mod tests { + + use std::path::PathBuf; + + #[test] + fn test_derive_output_name() { + let p = PathBuf::from("test.fastq"); + let result = super::append_umi_to_path(&p); + assert_eq!(result, PathBuf::from("test_with_UMIs.fastq")); + + let p = PathBuf::from("test.fastq.gz"); + let result = super::append_umi_to_path(&p); + assert_eq!(result, PathBuf::from("test_with_UMIs.fastq.gz")); + + let p = PathBuf::from("/some/path/test.fastq.gz"); + let result = super::append_umi_to_path(&p); + assert_eq!(result, PathBuf::from("/some/path/test_with_UMIs.fastq.gz")); + + let p = PathBuf::from("/some/path/test.something....fastq.gz"); + let result = super::append_umi_to_path(&p); + assert_eq!( + result, + PathBuf::from("/some/path/test_with_UMIs.something....fastq.gz") + ); + } +} From 32067efb0fd00c43c88776e66136500a203fae56 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 15 Jun 2023 21:34:16 +0200 Subject: [PATCH 32/72] Added more tests, this time for check_outputpath function --- Cargo.lock | 8 ++++--- Cargo.toml | 1 + src/file_io.rs | 61 +++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 02571d4..ff14c15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -972,15 +972,16 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.5.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ + "autocfg", "cfg-if", "fastrand", "redox_syscall", "rustix", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -1044,6 +1045,7 @@ dependencies = [ "itertools", "lazy_static", "regex", + "tempfile", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index dc3cc71..d02c219 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ file-format = "0.7.0" anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" +tempfile = "3.6.0" diff --git a/src/file_io.rs b/src/file_io.rs index 1c7a94d..002fea4 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -172,27 +172,68 @@ pub fn append_umi_to_path(path: &Path) -> PathBuf { #[cfg(test)] mod tests { + use super::*; + use std::fs::File; + use std::io::Write; use std::path::PathBuf; + use tempfile::TempDir; + + fn create_mock_file() -> (TempDir, PathBuf) { + let temp_dir = tempfile::tempdir().expect("Failed to create temporary directory"); + let file_path = temp_dir.path().join("mock.fq"); + + let mut file = File::create(&file_path).expect("Failed to create mock file"); + file.write_all(b"Mock file") + .expect("Failed to create mock file"); + + (temp_dir, file_path) + } #[test] - fn test_derive_output_name() { + fn test_correctly_derive_output_name() { let p = PathBuf::from("test.fastq"); - let result = super::append_umi_to_path(&p); + let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("test_with_UMIs.fastq")); let p = PathBuf::from("test.fastq.gz"); - let result = super::append_umi_to_path(&p); + let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("test_with_UMIs.fastq.gz")); let p = PathBuf::from("/some/path/test.fastq.gz"); - let result = super::append_umi_to_path(&p); + let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("/some/path/test_with_UMIs.fastq.gz")); + } + + #[test] + fn test_check_outputpath_existing_file_with_force() { + let (temp_dir, file_path) = create_mock_file(); + let compress = false; + let force = true; + + let result = check_outputpath(file_path.clone(), &compress, &force); + + assert!(result.is_ok()); + assert_eq!(result.unwrap(), file_path); + + temp_dir + .close() + .expect("Failed to remove temporary directory"); + } + + #[test] + fn test_check_outputpath_new_file() { + let (temp_dir, _file_path) = create_mock_file(); + let file_path = temp_dir.path().join("new_file"); + let compress = false; + let force = true; + + let result = check_outputpath(file_path.clone(), &compress, &force); + + assert!(result.is_ok()); + assert_eq!(result.unwrap(), temp_dir.path().join("new_file")); - let p = PathBuf::from("/some/path/test.something....fastq.gz"); - let result = super::append_umi_to_path(&p); - assert_eq!( - result, - PathBuf::from("/some/path/test_with_UMIs.something....fastq.gz") - ); + temp_dir + .close() + .expect("Failed to remove temporary directory"); } } From 43cd017d41ad3671a932c47e42aba30b53292c2a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 6 Jul 2023 18:06:28 +0200 Subject: [PATCH 33/72] Consolidate nested if/else to match in file_io::check_outputpath(). --- src/file_io.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 002fea4..c5f962c 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -1,6 +1,6 @@ use super::umi_errors::RuntimeErrors; use anyhow::{anyhow, Context, Result}; -use dialoguer::Confirm; +use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use regex::Regex; use std::{fs, path::Path, path::PathBuf}; @@ -129,16 +129,16 @@ pub fn write_to_file( pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Result { // handle the compression and adapt file extension if necessary. if let Some(extension) = path.extension().and_then(|e| e.to_str()) { - if *compress { - if !extension.ends_with("gz") { + match (*compress, extension.ends_with("gz")) { + (true, false) => { let mut new_extension = extension.to_owned(); new_extension.push_str(".gz"); path.set_extension(new_extension); } - } else { - if extension.ends_with("gz") { + (false, true) => { path.set_extension(""); } + _ => {} } } @@ -148,7 +148,7 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Res // return the path of it is ok to write, otherwise an error. if exists & !force { // force will disable prompt, but not the check. - if Confirm::new() + if Confirm::with_theme(&ColorfulTheme::default()) .with_prompt(format!("{} exists. Overwrite?", path.display())) .interact()? { From 715da32cad68fcc066c3389ed47d9bcc0303549a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 7 Jul 2023 19:16:14 +0200 Subject: [PATCH 34/72] Move extension handling and correction into new separate function. --- src/file_io.rs | 74 +++++++++++++++++++++++++++++++++------------ src/umi_external.rs | 10 ++++-- 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index c5f962c..5d1b89b 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -126,22 +126,7 @@ pub fn write_to_file( } // Checks whether an output path exists. -pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Result { - // handle the compression and adapt file extension if necessary. - if let Some(extension) = path.extension().and_then(|e| e.to_str()) { - match (*compress, extension.ends_with("gz")) { - (true, false) => { - let mut new_extension = extension.to_owned(); - new_extension.push_str(".gz"); - path.set_extension(new_extension); - } - (false, true) => { - path.set_extension(""); - } - _ => {} - } - } - +pub fn check_outputpath(path: PathBuf, force: &bool) -> Result { // check if the path already exists let exists = fs::metadata(&path).is_ok(); @@ -162,6 +147,29 @@ pub fn check_outputpath(mut path: PathBuf, compress: &bool, force: &bool) -> Res } } +// Checks whether an output path exists. +pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result { + // handle the compression and adapt file extension if necessary. + if let Some(extension) = path.extension().and_then(|e| e.to_str()) { + match (*compress, extension.ends_with("gz")) { + (true, false) => { + let mut new_extension = extension.to_owned(); + new_extension.push_str(".gz"); + path.set_extension(new_extension); + } + (false, true) => { + path.set_extension(""); + } + _ => {} + } + } else { + if *compress { + path.set_extension("gz"); + } + } + Ok(path) +} + pub fn append_umi_to_path(path: &Path) -> PathBuf { let path_str = path.as_os_str().to_string_lossy(); let re = Regex::new(r"^(?P\.*[^\.]+)\.(?P.*)$").unwrap(); @@ -204,13 +212,40 @@ mod tests { assert_eq!(result, PathBuf::from("/some/path/test_with_UMIs.fastq.gz")); } + #[test] + fn test_rectify_extension() { + let p = PathBuf::from("test.fastq"); + let result = rectify_extension(p, &false); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from("test.fastq")); + + let p = PathBuf::from("test.fastq"); + let result = rectify_extension(p, &true); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from("test.fastq.gz")); + + let p = PathBuf::from("test"); + let result = rectify_extension(p, &true); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from("test.gz")); + + let p = PathBuf::from("test.fastq.gz"); + let result = rectify_extension(p, &false); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from("test.fastq")); + + let p = PathBuf::from("test.fastq.gz"); + let result = rectify_extension(p, &true); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), PathBuf::from("test.fastq.gz")); + } + #[test] fn test_check_outputpath_existing_file_with_force() { let (temp_dir, file_path) = create_mock_file(); - let compress = false; let force = true; - let result = check_outputpath(file_path.clone(), &compress, &force); + let result = check_outputpath(file_path.clone(), &force); assert!(result.is_ok()); assert_eq!(result.unwrap(), file_path); @@ -224,10 +259,9 @@ mod tests { fn test_check_outputpath_new_file() { let (temp_dir, _file_path) = create_mock_file(); let file_path = temp_dir.path().join("new_file"); - let compress = false; let force = true; - let result = check_outputpath(file_path.clone(), &compress, &force); + let result = check_outputpath(file_path.clone(), &force); assert!(result.is_ok()); assert_eq!(result.unwrap(), temp_dir.path().join("new_file")); diff --git a/src/umi_external.rs b/src/umi_external.rs index 0bd549b..848e54b 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -4,7 +4,7 @@ use itertools::izip; use std::path::PathBuf; use super::file_io; -use crate::{file_io::check_outputpath, umi_errors::RuntimeErrors}; +use crate::umi_errors::RuntimeErrors; #[derive(Debug, Parser)] pub struct OptsExternal { #[clap( @@ -112,9 +112,13 @@ pub fn run(args: OptsExternal) -> Result { .r2_out .unwrap_or(file_io::append_umi_to_path(&args.r2_in)); + // set the correct extension. + output1 = file_io::rectify_extension(output1, &args.gzip)?; + output2 = file_io::rectify_extension(output2, &args.gzip)?; + // modify if output path according to compression settings and check if exists. - output1 = check_outputpath(output1, &args.gzip, &args.force)?; - output2 = check_outputpath(output2, &args.gzip, &args.force)?; + output1 = file_io::check_outputpath(output1, &args.force)?; + output2 = file_io::check_outputpath(output2, &args.force)?; println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); From 29c34566c391435d4cceac10346af011d2b8edbe Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 7 Jul 2023 19:52:23 +0200 Subject: [PATCH 35/72] Integrate a custom header. --- Cargo.lock | 431 +++++++++++++++++++++++++++++++++++----------------- Cargo.toml | 5 +- src/main.rs | 13 ++ 3 files changed, 308 insertions(+), 141 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff14c15..8f7fd6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,13 +10,62 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aho-corasick" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + [[package]] name = "anyhow" version = "1.0.71" @@ -33,14 +82,18 @@ dependencies = [ ] [[package]] -name = "atty" -version = "0.2.14" +name = "assert_cmd" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "anstyle", + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", ] [[package]] @@ -90,9 +143,9 @@ dependencies = [ [[package]] name = "bio-types" -version = "0.13.0" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa990f40a28735fa598dc3dd58d73e62e6b41458959d623903b927ba7b04c80" +checksum = "c915bf6c578d40e1e497f8c571a4514bc89c3195cec2abb8be6dd5500405c752" dependencies = [ "derive-new", "lazy_static", @@ -122,6 +175,23 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + +[[package]] +name = "bstr" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bv" version = "0.11.1" @@ -158,41 +228,79 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.2.25" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" dependencies = [ - "atty", - "bitflags", + "clap_builder", "clap_derive", - "clap_lex", - "indexmap", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", "strsim", - "termcolor", - "textwrap", ] [[package]] name = "clap_derive" -version = "3.2.25" +version = "4.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008" +checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" dependencies = [ "heck 0.4.1", - "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.23", ] [[package]] name = "clap_lex" -version = "0.2.4" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "concolor" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b946244a988c390a94667ae0e3958411fa40cc46ea496a929b263d883f5f9c3" dependencies = [ - "os_str_bytes", + "bitflags 1.3.2", + "concolor-query", + "is-terminal", +] + +[[package]] +name = "concolor-clap" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435ff0007a3bb04099fe1beedc6b76e7dd5340c90b168008ac0d7e87441de1bf" +dependencies = [ + "clap", + "concolor", +] + +[[package]] +name = "concolor-query" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" +dependencies = [ + "windows-sys 0.45.0", ] [[package]] @@ -219,9 +327,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b015497079b9a9d69c02ad25de6c0a6edef051ea6360a327d0bd05802ef64ad" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" dependencies = [ "csv-core", "itoa", @@ -267,6 +375,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "either" version = "1.8.1" @@ -368,9 +488,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", @@ -412,18 +532,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.1.19" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hermit-abi" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "indexmap" @@ -437,13 +548,14 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.3" +version = "0.17.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cef509aa9bc73864d6756f0d34d35504af3cf0844373afe9b8669a5b8005a729" +checksum = "8ff8cc23a7393a397ed1d7f56e6365cba772aba9f9912ab968b03043c395d057" dependencies = [ "console", + "instant", "number_prefix", - "portable-atomic 0.3.20", + "portable-atomic", "unicode-width", ] @@ -462,11 +574,22 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi 0.3.1", + "hermit-abi", "libc", "windows-sys 0.48.0", ] +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix 0.38.3", + "windows-sys 0.48.0", +] + [[package]] name = "itertools" version = "0.10.5" @@ -487,9 +610,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" [[package]] name = "lazy_static" @@ -499,9 +622,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.144" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libm" @@ -515,6 +638,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" +[[package]] +name = "linux-raw-sys" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" + [[package]] name = "matrixmultiply" version = "0.3.7" @@ -648,9 +777,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.17.1" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "ordered-float" @@ -662,16 +791,16 @@ dependencies = [ ] [[package]] -name = "os_str_bytes" -version = "6.5.0" +name = "owo-colors" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" [[package]] name = "paste" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79" +checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35" [[package]] name = "petgraph" @@ -685,24 +814,43 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "0.3.20" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e30165d31df606f5726b090ec7592c308a0eaf61721ff64c9a3018e344a8753e" +checksum = "767eb9f07d4a5ebcb39bbf2d452058a93c011373abf6832e24194a1c3f004794" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "predicates" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ - "portable-atomic 1.3.2", + "anstyle", + "difflib", + "itertools", + "predicates-core", ] [[package]] -name = "portable-atomic" -version = "1.3.2" +name = "predicates-core" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc59d1bcc64fc5d021d67521f818db868368028108d37f0e98d74e33f68297b5" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" [[package]] -name = "ppv-lite86" -version = "0.2.17" +name = "predicates-tree" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] [[package]] name = "proc-macro-error" @@ -730,18 +878,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.58" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa1fb82fc0c281dd9671101b66b771ebbe1eaf967b96ac8740dcba4b70005ca8" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.27" +version = "1.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" +checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" dependencies = [ "proc-macro2", ] @@ -798,14 +946,26 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] name = "regex" -version = "1.8.2" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1a59b5d8e97dee33696bf13c5ba8ab85341c002922fba050069326b9c498974" +checksum = "e9aaecc05d5c4b5f7da074b9a0d1a0867e71fd36e7fc0482d8bcfe8e8fc56290" dependencies = [ "aho-corasick", "memchr", @@ -814,9 +974,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" [[package]] name = "rustc_version" @@ -829,29 +989,42 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.19" +version = "0.37.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" dependencies = [ - "bitflags", + "bitflags 1.3.2", "errno", "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.8", + "windows-sys 0.48.0", +] + +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.3.3", + "errno", + "libc", + "linux-raw-sys 0.4.3", "windows-sys 0.48.0", ] [[package]] name = "rustversion" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" +checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f" [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" [[package]] name = "semver" @@ -861,22 +1034,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.163" +version = "1.0.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +checksum = "7daf513456463b42aa1d94cff7e0c24d682b429f020b9afa4f5ba5c40a22b237" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.163" +version = "1.0.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +checksum = "b69b106b68bc8054f0e974e70d19984040f8a5cf9215ca82626ea4853f82c4b9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -961,9 +1134,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.16" +version = "2.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" dependencies = [ "proc-macro2", "quote", @@ -980,43 +1153,34 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall", - "rustix", + "rustix 0.37.23", "windows-sys 0.48.0", ] [[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "textwrap" -version = "0.16.0" +name = "termtree" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.16", + "syn 2.0.23", ] [[package]] @@ -1036,23 +1200,26 @@ name = "umi-transfer" version = "0.2.0" dependencies = [ "anyhow", + "assert_cmd", "bio", "clap", + "concolor-clap", "dialoguer", "file-format", "flate2", "indicatif", "itertools", "lazy_static", + "owo-colors", "regex", "tempfile", ] [[package]] name = "unicode-ident" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" +checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" [[package]] name = "unicode-segmentation" @@ -1066,6 +1233,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "vec_map" version = "0.8.2" @@ -1082,41 +1255,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" +name = "wait-timeout" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" dependencies = [ - "winapi", + "libc", ] [[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "windows-sys" @@ -1133,7 +1284,7 @@ version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "windows-targets 0.48.0", + "windows-targets 0.48.1", ] [[package]] @@ -1153,9 +1304,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.48.0" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" dependencies = [ "windows_aarch64_gnullvm 0.48.0", "windows_aarch64_msvc 0.48.0", diff --git a/Cargo.toml b/Cargo.toml index d02c219..037233a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "3.2.10", features = ["derive"] } +clap = { version = "4.3.11", features = ["derive"] } bio = "0.41.0" lazy_static = "1.4" indicatif = "0.17.0" @@ -17,3 +17,6 @@ anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" tempfile = "3.6.0" +assert_cmd = "2.0.11" +owo-colors = "3.5.0" +concolor-clap = "0.1.0" diff --git a/src/main.rs b/src/main.rs index ff163bf..ed32635 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ extern crate core; use anyhow::Context; use clap::Parser; +use owo_colors::OwoColorize; use crate::auxiliary::timedrun; use crate::umi_external::OptsExternal; @@ -11,6 +12,15 @@ mod file_io; mod umi_errors; mod umi_external; +const LOGO: &str = r#" +â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ SciLifeLab - National Genomics Infrastructure â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘â–‘ +"#; + +const WEB: &str = r#"https://www.scilifelab.se +https://ngisweden.scilifelab.se +https://github.com/SciLifeLab/umi-transfer +"#; + #[derive(clap::Parser)] #[clap( version = "0.2.0", @@ -32,6 +42,9 @@ enum Subcommand { } fn main() { + println!("\n{}", LOGO.fg_rgb::<0xA7, 0xC9, 0x47>().bold()); + //println!("{}", WEB.fg_rgb::<0x49, 0x1F, 0x53>().italic()); + println!("{}", WEB.fg_rgb::<0x6F, 0x6F, 0x6F>().italic()); let opt: Opt = Opt::parse(); timedrun("umi-transfer finished", || { let res = match opt.cmd { From 872a331106a820e2445be8c5d04839bdc1a1ddbd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 10 Jul 2023 15:53:10 +0200 Subject: [PATCH 36/72] Collapse else if clause in rectify_extension(). --- src/file_io.rs | 6 ++---- src/main.rs | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 5d1b89b..7b30436 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -162,10 +162,8 @@ pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result } _ => {} } - } else { - if *compress { - path.set_extension("gz"); - } + } else if *compress { + path.set_extension("gz"); } Ok(path) } diff --git a/src/main.rs b/src/main.rs index ed32635..2db2755 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,7 +25,7 @@ https://github.com/SciLifeLab/umi-transfer #[clap( version = "0.2.0", author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", - about = "A tool for transferring Unique Molecular Identifiers (UMIs).\n\nMost tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs.\n\n You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files.\n\n" + about = "A tool for transferring Unique Molecular Identifiers (UMIs).\n\nMost tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files.\n\n" )] pub struct Opt { From 8fddde6abf5ead734de4667aac46ee9bdfb178ba Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 10 Jul 2023 19:01:28 +0200 Subject: [PATCH 37/72] Add test data, 10 simulated reads pairs from Escherichia phage phiX174 genome (NC_001422.1). --- tests/seqdata/read1.fq | 40 +++++++++++++++++++++++++++++++ tests/seqdata/read1.fq.gz | Bin 0 -> 583 bytes tests/seqdata/read2.fq | 40 +++++++++++++++++++++++++++++++ tests/seqdata/read2.fq.gz | Bin 0 -> 618 bytes tests/seqdata/umi.fq | 40 +++++++++++++++++++++++++++++++ tests/seqdata/umi.fq.gz | Bin 0 -> 223 bytes tests/seqdata/umi_shuffled.fq | 40 +++++++++++++++++++++++++++++++ tests/seqdata/umi_shuffled.fq.gz | Bin 0 -> 231 bytes 8 files changed, 160 insertions(+) create mode 100644 tests/seqdata/read1.fq create mode 100644 tests/seqdata/read1.fq.gz create mode 100644 tests/seqdata/read2.fq create mode 100644 tests/seqdata/read2.fq.gz create mode 100644 tests/seqdata/umi.fq create mode 100644 tests/seqdata/umi.fq.gz create mode 100644 tests/seqdata/umi_shuffled.fq create mode 100644 tests/seqdata/umi_shuffled.fq.gz diff --git a/tests/seqdata/read1.fq b/tests/seqdata/read1.fq new file mode 100644 index 0000000..d5efacd --- /dev/null +++ b/tests/seqdata/read1.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 +AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 +TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 +GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 +GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 +GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 +TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 +TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 +TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 +TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 +TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/seqdata/read1.fq.gz b/tests/seqdata/read1.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..905bb435be0fe0dffe1b37b066be0c69281df4fc GIT binary patch literal 583 zcmV-N0=WGjiwFqzH>_j;19D|yWHBygaRAL$%TB{E5WMF%IC9CWRg)9TimoaZF6{q* z3p+DTC4_jWfYb^(c4Awe$Bub;_0#!ud^+3Xy6gM(B~Gt-eqM)l82W0KWtrFN?K`vd zaQs=sFtfDy8G^Ucl`ff_wY6F zako1R0uHPJoB$7Wz*`LV0URGr#qQ2m;v`<9h^l176HYAO{gO{f2-qQv(i@3b^AWKcqeHY5R8;r<-n^|M^=G(;ex42u_!ilWaLe{zODfJ-&%DN*#SGu_S~Ff&!4ftWH}qyHIESgi V*Cp|{k^V&ibZ-XkVcNF|001hJ6wv?x literal 0 HcmV?d00001 diff --git a/tests/seqdata/read2.fq b/tests/seqdata/read2.fq new file mode 100644 index 0000000..92154b5 --- /dev/null +++ b/tests/seqdata/read2.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 +ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 +AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 +AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT ++ +FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 +GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 +CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA ++ +F:FFFFF:FFFFFFFFFFFFFF::F:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFF:FF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 +AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 +TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC ++ +FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 +GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 +GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 +AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:,F:FFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/seqdata/read2.fq.gz b/tests/seqdata/read2.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..4f9ed6d219275aebf7cdf1867820a3fa532940fe GIT binary patch literal 618 zcmV-w0+szAiwFnU9#A{($oK~NrRnm#9@s2y z1C>5>4Isd5nLtny9R5W%Q@!p47h^9=z6!}SOQY1HV790mB0?yjkZ;@%I(@zE`*nt3 zlY1Pe+&d-o6&S`C>_LV8G1%az*0?g)F^S#HH9Ac)l#zQ5&z`*M-mkpCNUye8%akmKo E0DvtjivR!s literal 0 HcmV?d00001 diff --git a/tests/seqdata/umi.fq b/tests/seqdata/umi.fq new file mode 100644 index 0000000..65c7f81 --- /dev/null +++ b/tests/seqdata/umi.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CCTGAGACC ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGACATGAC ++ +:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TGGACGCAC ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GCCTAAACG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AATTGAAGT ++ +FFFFF:FFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AACAACAGA ++ +FFFFFFF:F +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TCACTTATT ++ +:F:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GATATGAGG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CTAAATTGG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TACCAAGGA ++ +FFFFFFFFF diff --git a/tests/seqdata/umi.fq.gz b/tests/seqdata/umi.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..1255430dbd8126f82b879c75367746d69e95c028 GIT binary patch literal 223 zcmV<503iP#iwFpYFsx(%19fd_E@p86mD4*8!ypU>;GI)+@4{QbQ(UXDBBfd4`=6yI zYE^X!fDn)6ll2?sgVt?bo*O}}g-4Fys8LA!*GOi(lt z)YHd?9@|gB_t;b?`CM}}1pRI(>XA)y>(~MFd2@j>6d@ODHY@_Bq ZL^~V9)RBn(nR>-P`wcMlVUm>t002AoXY~L8 literal 0 HcmV?d00001 diff --git a/tests/seqdata/umi_shuffled.fq b/tests/seqdata/umi_shuffled.fq new file mode 100644 index 0000000..bcfebe5 --- /dev/null +++ b/tests/seqdata/umi_shuffled.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:10068:1579 2:N:0:GCTTCAGGGT+AAGGTAGCGT +ACCGAATGA ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10095:2033 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGCTGATTT ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10104:1391 2:N:0:GCTTCAGGGT+AAGGTAGCGT +ATAAAAGTA ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10140:1297 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TGTATGCAG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10158:1705 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TAGCGTGTT ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10194:1517 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CACATGTTA ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10194:1986 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGACAGGCA ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10203:1251 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AAAAATGCC ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10203:1971 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TCAAACTTA ++ +FF:F,FFFF +@SCILIFELAB:500:NGISTLM:1:1101:10221:1313 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGTGCCTAT ++ +FFFFFFFFF diff --git a/tests/seqdata/umi_shuffled.fq.gz b/tests/seqdata/umi_shuffled.fq.gz new file mode 100644 index 0000000000000000000000000000000000000000..70b757f249541f51e71a24115a9875d4b08fb1ac GIT binary patch literal 231 zcmV^79SXZl<$=o|>^SAw hE-}lM39a`Z*`4lNWegoot-xQbw0062maJm2h literal 0 HcmV?d00001 From 5da1b9ffa3ce8d0b826cadaba21f0121e8d75a65 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 10 Jul 2023 20:04:48 +0200 Subject: [PATCH 38/72] Replace tempdir crate with assert_fs. --- Cargo.lock | 193 ++++++++++++++++++++++++++++++++++++++++--------- Cargo.toml | 8 +- src/file_io.rs | 23 ++---- 3 files changed, 172 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8f7fd6a..3721d6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "aho-corasick" version = "1.0.2" @@ -96,6 +105,21 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "assert_fs" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f070617a68e5c2ed5d06ee8dd620ee18fb72b99f6c094bed34cf8ab07c875b48" +dependencies = [ + "anstyle", + "doc-comment", + "globwalk", + "predicates", + "predicates-core", + "predicates-tree", + "tempfile", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -273,36 +297,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" -[[package]] -name = "concolor" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b946244a988c390a94667ae0e3958411fa40cc46ea496a929b263d883f5f9c3" -dependencies = [ - "bitflags 1.3.2", - "concolor-query", - "is-terminal", -] - -[[package]] -name = "concolor-clap" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435ff0007a3bb04099fe1beedc6b76e7dd5340c90b168008ac0d7e87441de1bf" -dependencies = [ - "clap", - "concolor", -] - -[[package]] -name = "concolor-query" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" -dependencies = [ - "windows-sys 0.45.0", -] - [[package]] name = "console" version = "0.15.7" @@ -477,6 +471,21 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "fxhash" version = "0.2.1" @@ -509,6 +518,30 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "globset" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" +dependencies = [ + "aho-corasick 0.7.20", + "bstr", + "fnv", + "log", + "regex", +] + +[[package]] +name = "globwalk" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93e3af942408868f6934a7b85134a3230832b9977cf66125df2f9edcfce4ddcc" +dependencies = [ + "bitflags 1.3.2", + "ignore", + "walkdir", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -536,6 +569,23 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" +[[package]] +name = "ignore" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +dependencies = [ + "globset", + "lazy_static", + "log", + "memchr", + "regex", + "same-file", + "thread_local", + "walkdir", + "winapi-util", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -644,6 +694,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" +[[package]] +name = "log" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" + [[package]] name = "matrixmultiply" version = "0.3.7" @@ -729,6 +785,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "num-complex" version = "0.4.3" @@ -832,8 +894,11 @@ checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ "anstyle", "difflib", + "float-cmp", "itertools", + "normalize-line-endings", "predicates-core", + "regex", ] [[package]] @@ -955,7 +1020,7 @@ version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" dependencies = [ - "aho-corasick", + "aho-corasick 1.0.2", "memchr", "regex-automata", "regex-syntax", @@ -967,7 +1032,7 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9aaecc05d5c4b5f7da074b9a0d1a0867e71fd36e7fc0482d8bcfe8e8fc56290" dependencies = [ - "aho-corasick", + "aho-corasick 1.0.2", "memchr", "regex-syntax", ] @@ -1026,6 +1091,15 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "0.1.20" @@ -1183,6 +1257,16 @@ dependencies = [ "syn 2.0.23", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "triple_accel" version = "0.4.0" @@ -1201,9 +1285,9 @@ version = "0.2.0" dependencies = [ "anyhow", "assert_cmd", + "assert_fs", "bio", "clap", - "concolor-clap", "dialoguer", "file-format", "flate2", @@ -1211,8 +1295,8 @@ dependencies = [ "itertools", "lazy_static", "owo-colors", + "predicates", "regex", - "tempfile", ] [[package]] @@ -1263,12 +1347,53 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.45.0" diff --git a/Cargo.toml b/Cargo.toml index 037233a..b9a50b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,9 @@ file-format = "0.7.0" anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" -tempfile = "3.6.0" -assert_cmd = "2.0.11" owo-colors = "3.5.0" -concolor-clap = "0.1.0" + +[dev-dependencies] +assert_cmd = "2.0.11" +assert_fs = "1.0.13" +predicates = "3.0.3" diff --git a/src/file_io.rs b/src/file_io.rs index 7b30436..84f504d 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -179,20 +179,13 @@ pub fn append_umi_to_path(path: &Path) -> PathBuf { mod tests { use super::*; - use std::fs::File; - use std::io::Write; + use assert_fs::fixture::{NamedTempFile, TempDir}; use std::path::PathBuf; - use tempfile::TempDir; - fn create_mock_file() -> (TempDir, PathBuf) { - let temp_dir = tempfile::tempdir().expect("Failed to create temporary directory"); - let file_path = temp_dir.path().join("mock.fq"); - - let mut file = File::create(&file_path).expect("Failed to create mock file"); - file.write_all(b"Mock file") - .expect("Failed to create mock file"); - - (temp_dir, file_path) + fn create_mock_file() -> (TempDir, NamedTempFile) { + let temp_dir = assert_fs::TempDir::new().expect("Failed to create temporary directory"); + let mock_file = NamedTempFile::new("ACTG.fq").unwrap(); + (temp_dir, mock_file) } #[test] @@ -243,10 +236,10 @@ mod tests { let (temp_dir, file_path) = create_mock_file(); let force = true; - let result = check_outputpath(file_path.clone(), &force); + let result = check_outputpath(file_path.path().to_path_buf(), &force); assert!(result.is_ok()); - assert_eq!(result.unwrap(), file_path); + assert_eq!(result.unwrap(), file_path.path().to_path_buf()); temp_dir .close() @@ -259,7 +252,7 @@ mod tests { let file_path = temp_dir.path().join("new_file"); let force = true; - let result = check_outputpath(file_path.clone(), &force); + let result = check_outputpath(file_path, &force); assert!(result.is_ok()); assert_eq!(result.unwrap(), temp_dir.path().join("new_file")); From e8bb6acded014e5307b5629529e8b030d4517b99 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 12:35:49 +0200 Subject: [PATCH 39/72] Make colors of the header optional. --- Cargo.lock | 43 +++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 2 +- src/main.rs | 14 +++++++++++--- 3 files changed, 53 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3721d6b..3eb62c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -120,6 +120,17 @@ dependencies = [ "tempfile", ] +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -563,6 +574,15 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + [[package]] name = "hermit-abi" version = "0.3.2" @@ -624,7 +644,7 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "libc", "windows-sys 0.48.0", ] @@ -635,11 +655,17 @@ version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "hermit-abi", + "hermit-abi 0.3.2", "rustix 0.38.3", "windows-sys 0.48.0", ] +[[package]] +name = "is_ci" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" + [[package]] name = "itertools" version = "0.10.5" @@ -857,6 +883,9 @@ name = "owo-colors" version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +dependencies = [ + "supports-color", +] [[package]] name = "paste" @@ -1195,6 +1224,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "supports-color" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" +dependencies = [ + "atty", + "is_ci", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index b9a50b9..9805db3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ file-format = "0.7.0" anyhow = "1.0.71" dialoguer = "0.10.4" regex = "1.8.1" -owo-colors = "3.5.0" +owo-colors = { version = "3.5", features = ["supports-colors"] } [dev-dependencies] assert_cmd = "2.0.11" diff --git a/src/main.rs b/src/main.rs index 2db2755..806135d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ extern crate core; use anyhow::Context; use clap::Parser; -use owo_colors::OwoColorize; +use owo_colors::{OwoColorize, Stream::Stdout}; use crate::auxiliary::timedrun; use crate::umi_external::OptsExternal; @@ -42,9 +42,17 @@ enum Subcommand { } fn main() { - println!("\n{}", LOGO.fg_rgb::<0xA7, 0xC9, 0x47>().bold()); + println!( + "\n{}", + LOGO.bold() + .if_supports_color(Stdout, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) + ); //println!("{}", WEB.fg_rgb::<0x49, 0x1F, 0x53>().italic()); - println!("{}", WEB.fg_rgb::<0x6F, 0x6F, 0x6F>().italic()); + println!( + "{}", + WEB.italic() + .if_supports_color(Stdout, |text| text.fg_rgb::<0x6F, 0x6F, 0x6F>()) + ); let opt: Opt = Opt::parse(); timedrun("umi-transfer finished", || { let res = match opt.cmd { From 7ad8caf97cb64fe2b8f6b6e22be08a8e0d871223 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 16:21:07 +0200 Subject: [PATCH 40/72] Refactor errors so they accept an optional path. --- src/file_io.rs | 25 +++++++++++++------------ src/umi_errors.rs | 35 ++++++++++++++++++++++++++++++----- src/umi_external.rs | 4 ++-- 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 84f504d..323a91b 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -54,7 +54,8 @@ impl OutputFile { // Read input file to Reader. Automatically scans if input is compressed with file-format crate. pub fn read_fastq(path: &PathBuf) -> Result>> { - fs::metadata(path).map_err(|_| anyhow!(RuntimeErrors::FileNotFoundError))?; + fs::metadata(path) + .map_err(|_e| anyhow!(RuntimeErrors::FileNotFoundError(Some(path.into()))))?; let format = FileFormat::from_file(path).context("Failed to determine file format")?; let reader: ReadFile = match format { @@ -75,31 +76,31 @@ pub fn read_fastq(path: &PathBuf) -> Result OutputFile { +pub fn output_file(name: PathBuf) -> Result { if let Some(extension) = name.extension() { if extension == "gz" { // File has gz extension, which has been enforced by check_outputpath() if -z was provided. - OutputFile::Gzip { + Ok(OutputFile::Gzip { read: std::fs::File::create(name.as_path()) .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) .map(bio::io::fastq::Writer::new) - .unwrap(), - } + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + }) } else { // File has extension but not gz - OutputFile::Fastq { + Ok(OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) - .unwrap(), - } + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + }) } } else { //file has no extension. Assume plain-text. - OutputFile::Fastq { + Ok(OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) - .unwrap(), - } + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + }) } } @@ -140,7 +141,7 @@ pub fn check_outputpath(path: PathBuf, force: &bool) -> Result { println!("File will be overwritten."); Ok(path) } else { - Err(anyhow!(RuntimeErrors::FileExistsError)) + Err(anyhow!(RuntimeErrors::FileExistsError(Some(path)))) } } else { Ok(path) diff --git a/src/umi_errors.rs b/src/umi_errors.rs index 90bfe15..9f67bca 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -1,8 +1,11 @@ +use std::path::PathBuf; + #[derive(Debug)] pub enum RuntimeErrors { ReadIDMismatchError, - FileNotFoundError, - FileExistsError, + FileExistsError(Option), + FileNotFoundError(Option), + OutputNotWriteableError(Option), //GeneralError, } @@ -11,12 +14,34 @@ impl std::fmt::Display for RuntimeErrors { match self { Self::ReadIDMismatchError => write!( f, - "IDs of UMI and read records mismatch. Please provide sorted files!" + "IDs of UMI and read records mismatch. Please provide sorted files as input!" + ), + Self::FileExistsError(None) => { + write!(f, "Output file exists, but must not be overwritten.") + } + Self::FileExistsError(Some(path)) => write!( + f, + "Output file {} exists, but must not be overwritten.", + path.to_string_lossy() ), - Self::FileNotFoundError => { + Self::FileNotFoundError(None) => { write!(f, "Specified file does not exist or is not readable!") } - Self::FileExistsError => write!(f, "Output file exists, but must not be overwritten."), + Self::FileNotFoundError(Some(path)) => { + write!( + f, + "{} does not exist or is not readable!", + path.to_string_lossy() + ) + } + Self::OutputNotWriteableError(None) => { + write!(f, "Output file is missing or not writeable.") + } + Self::OutputNotWriteableError(Some(path)) => write!( + f, + "Output file {} is missing or not writeable.", + path.to_string_lossy() + ), //Self::GeneralError => write!(f, "Encountered an error."), } } diff --git a/src/umi_external.rs b/src/umi_external.rs index 848e54b..0b0cac1 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -123,8 +123,8 @@ pub fn run(args: OptsExternal) -> Result { println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); - let mut write_file_r1 = file_io::output_file(output1); - let mut write_file_r2 = file_io::output_file(output2); + let mut write_file_r1 = file_io::output_file(output1)?; + let mut write_file_r2 = file_io::output_file(output2)?; // Record counter let mut counter: i32 = 0; From 32b348a542c1c7be571ff49c5f1279b59f8df986 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 17:15:09 +0200 Subject: [PATCH 41/72] Consistently replace OutputFile with Result in all functions. --- src/file_io.rs | 13 +++++++++---- src/umi_errors.rs | 10 +++------- src/umi_external.rs | 4 ++-- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 323a91b..78bfc3a 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -38,15 +38,20 @@ pub enum OutputFile { // Implement write for OutputFile enum impl OutputFile { - pub fn write(self, header: &str, desc: Option<&str>, s: bio::io::fastq::Record) -> OutputFile { + pub fn write( + self, + header: &str, + desc: Option<&str>, + s: bio::io::fastq::Record, + ) -> Result { match self { OutputFile::Fastq { mut read } => { read.write(header, desc, s.seq(), s.qual()).unwrap(); - OutputFile::Fastq { read } + Ok(OutputFile::Fastq { read }) } OutputFile::Gzip { mut read } => { read.write(header, desc, s.seq(), s.qual()).unwrap(); - OutputFile::Gzip { read } + Ok(OutputFile::Gzip { read }) } } } @@ -111,7 +116,7 @@ pub fn write_to_file( umi: &[u8], umi_sep: Option<&String>, edit_nr: Option, -) -> OutputFile { +) -> Result { let s = input; let delim = umi_sep.as_ref().map(|s| s.as_str()).unwrap_or(":"); // the delimiter for the UMI if let Some(number) = edit_nr { diff --git a/src/umi_errors.rs b/src/umi_errors.rs index 9f67bca..eb4193c 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -22,17 +22,13 @@ impl std::fmt::Display for RuntimeErrors { Self::FileExistsError(Some(path)) => write!( f, "Output file {} exists, but must not be overwritten.", - path.to_string_lossy() + path.display() ), Self::FileNotFoundError(None) => { write!(f, "Specified file does not exist or is not readable!") } Self::FileNotFoundError(Some(path)) => { - write!( - f, - "{} does not exist or is not readable!", - path.to_string_lossy() - ) + write!(f, "{} does not exist or is not readable!", path.display()) } Self::OutputNotWriteableError(None) => { write!(f, "Output file is missing or not writeable.") @@ -40,7 +36,7 @@ impl std::fmt::Display for RuntimeErrors { Self::OutputNotWriteableError(Some(path)) => write!( f, "Output file {} is missing or not writeable.", - path.to_string_lossy() + path.display() ), //Self::GeneralError => write!(f, "Encountered an error."), } diff --git a/src/umi_external.rs b/src/umi_external.rs index 0b0cac1..a68d418 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -149,7 +149,7 @@ pub fn run(args: OptsExternal) -> Result { ru_rec.seq(), args.delim.as_ref(), read_nr, - ); + )?; } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); } @@ -163,7 +163,7 @@ pub fn run(args: OptsExternal) -> Result { ru_rec.seq(), args.delim.as_ref(), read_nr, - ); + )?; } else { return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); } From 4a8e0da9c57349defc7893de132a1ea21c1a9475 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 18:17:10 +0200 Subject: [PATCH 42/72] Add a new ReadWriteError to RuntimeErrors. --- src/file_io.rs | 16 ++++++++-------- src/umi_errors.rs | 16 +++++++++------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 78bfc3a..e24f549 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -45,14 +45,14 @@ impl OutputFile { s: bio::io::fastq::Record, ) -> Result { match self { - OutputFile::Fastq { mut read } => { - read.write(header, desc, s.seq(), s.qual()).unwrap(); - Ok(OutputFile::Fastq { read }) - } - OutputFile::Gzip { mut read } => { - read.write(header, desc, s.seq(), s.qual()).unwrap(); - Ok(OutputFile::Gzip { read }) - } + OutputFile::Fastq { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + Ok(_) => Ok(OutputFile::Fastq { read }), + Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), + }, + OutputFile::Gzip { mut read } => match read.write(header, desc, s.seq(), s.qual()) { + Ok(_) => Ok(OutputFile::Gzip { read }), + Err(_) => Err(anyhow!(RuntimeErrors::ReadWriteError(s))), + }, } } } diff --git a/src/umi_errors.rs b/src/umi_errors.rs index eb4193c..85f4a40 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -2,20 +2,16 @@ use std::path::PathBuf; #[derive(Debug)] pub enum RuntimeErrors { - ReadIDMismatchError, FileExistsError(Option), FileNotFoundError(Option), OutputNotWriteableError(Option), - //GeneralError, + ReadIDMismatchError, + ReadWriteError(bio::io::fastq::Record), } impl std::fmt::Display for RuntimeErrors { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::ReadIDMismatchError => write!( - f, - "IDs of UMI and read records mismatch. Please provide sorted files as input!" - ), Self::FileExistsError(None) => { write!(f, "Output file exists, but must not be overwritten.") } @@ -38,7 +34,13 @@ impl std::fmt::Display for RuntimeErrors { "Output file {} is missing or not writeable.", path.display() ), - //Self::GeneralError => write!(f, "Encountered an error."), + Self::ReadIDMismatchError => write!( + f, + "IDs of UMI and read records mismatch. Please provide sorted files as input!" + ), + Self::ReadWriteError(record) => { + write!(f, "Failure to write read {} to file.", record.id()) + } } } } From 9f99ad31075baf824d58e5b762d34b40c44dfd41 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 19:24:14 +0200 Subject: [PATCH 43/72] Write error messages to Stderr instead of Stdout and color with owo-colorize. --- src/main.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 806135d..1e42e88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,8 @@ extern crate core; use anyhow::Context; use clap::Parser; -use owo_colors::{OwoColorize, Stream::Stdout}; +use owo_colors::{OwoColorize, Stream::Stderr, Stream::Stdout}; +use std::process; use crate::auxiliary::timedrun; use crate::umi_external::OptsExternal; @@ -44,14 +45,12 @@ enum Subcommand { fn main() { println!( "\n{}", - LOGO.bold() - .if_supports_color(Stdout, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) + LOGO.if_supports_color(Stdout, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) ); //println!("{}", WEB.fg_rgb::<0x49, 0x1F, 0x53>().italic()); println!( "{}", - WEB.italic() - .if_supports_color(Stdout, |text| text.fg_rgb::<0x6F, 0x6F, 0x6F>()) + WEB.if_supports_color(Stdout, |text| text.fg_rgb::<0x6F, 0x6F, 0x6F>()) ); let opt: Opt = Opt::parse(); timedrun("umi-transfer finished", || { @@ -62,7 +61,11 @@ fn main() { }; if let Err(v) = res { - println!("{:?}", v) + eprintln!( + "{:?}", + v.if_supports_color(Stderr, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) + ); + process::exit(1); } }); } From 7bf84d4846cf20aeb8aaf52ef5785eed5d2b24ed Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 11 Jul 2023 21:44:31 +0200 Subject: [PATCH 44/72] Created a custom error parsing for clap:Error enum that allows custom styling of particular errors. --- src/main.rs | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1e42e88..a6f0e1d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,9 @@ extern crate core; -use anyhow::Context; +use anyhow::{anyhow, Context}; use clap::Parser; use owo_colors::{OwoColorize, Stream::Stderr, Stream::Stdout}; + use std::process; use crate::auxiliary::timedrun; @@ -52,7 +53,23 @@ fn main() { "{}", WEB.if_supports_color(Stdout, |text| text.fg_rgb::<0x6F, 0x6F, 0x6F>()) ); - let opt: Opt = Opt::parse(); + + // for custom styles of clap parsing errors and help message + let opt: Opt = Opt::try_parse().unwrap_or_else(|err| { + match err.kind() { + clap::error::ErrorKind::DisplayHelp => { + err.print().unwrap(); + } + clap::error::ErrorKind::MissingRequiredArgument => { + eprintln!("Error: {} is required", err); + } + _ => { + err.print().unwrap(); + } + }; + process::exit(1); + }); + timedrun("umi-transfer finished", || { let res = match opt.cmd { Subcommand::External(arg) => { @@ -60,10 +77,10 @@ fn main() { } //Subcommand::Internal(arg) => umi_internal::run(arg), }; - if let Err(v) = res { + if let Err(err) = res { eprintln!( "{:?}", - v.if_supports_color(Stderr, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) + err.if_supports_color(Stderr, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) ); process::exit(1); } From 41c4aefed8cd1469b43a3e2fa724e5ac850f5ec3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 16:45:15 +0200 Subject: [PATCH 45/72] Again error styling. --- src/main.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/main.rs b/src/main.rs index a6f0e1d..eab452e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ extern crate core; -use anyhow::{anyhow, Context}; +use anyhow::Context; use clap::Parser; use owo_colors::{OwoColorize, Stream::Stderr, Stream::Stdout}; @@ -27,9 +27,11 @@ https://github.com/SciLifeLab/umi-transfer #[clap( version = "0.2.0", author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", - about = "A tool for transferring Unique Molecular Identifiers (UMIs).\n\nMost tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files.\n\n" + about = "A tool for transferring Unique Molecular Identifiers (UMIs).", + long_about = "Most tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files." )] - +#[command(author, version, about, long_about = None)] +#[command(propagate_version = true)] pub struct Opt { #[clap(subcommand)] cmd: Subcommand, @@ -57,14 +59,19 @@ fn main() { // for custom styles of clap parsing errors and help message let opt: Opt = Opt::try_parse().unwrap_or_else(|err| { match err.kind() { - clap::error::ErrorKind::DisplayHelp => { + // rust render as Clap would by default. + clap::error::ErrorKind::DisplayHelp + | clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand + | clap::error::ErrorKind::DisplayVersion => { err.print().unwrap(); } - clap::error::ErrorKind::MissingRequiredArgument => { - eprintln!("Error: {} is required", err); - } _ => { - err.print().unwrap(); + // color green for consistency with Runtime errors. Sadly no styled formatting yet. + eprintln!( + "{}", + err.render() + .if_supports_color(Stderr, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) + ) } }; process::exit(1); From 628d98d16dbf59dad59e044a41186b20bd1dd569 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 19:26:51 +0200 Subject: [PATCH 46/72] Adapt the append_umi_to_path() function to additional edge cases with hidden directories and hidden files. --- src/file_io.rs | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index e24f549..c726a7c 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -3,7 +3,7 @@ use anyhow::{anyhow, Context, Result}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use regex::Regex; -use std::{fs, path::Path, path::PathBuf}; +use std::{borrow::Cow, fs, path::Path, path::PathBuf}; // Defining types for simplicity type File = std::fs::File; @@ -176,8 +176,22 @@ pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result pub fn append_umi_to_path(path: &Path) -> PathBuf { let path_str = path.as_os_str().to_string_lossy(); - let re = Regex::new(r"^(?P\.*[^\.]+)\.(?P.*)$").unwrap(); - let new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); + + let new_path_str: Cow<'_, str>; + + if path_str.contains('\\') || path_str.contains('/') { + // Path group: Match everything until a forward or backward slash not followed by a forward or backward slash non-greedy (*?) + // Stem group: Match literal dot zero or one time, and everything thereafter that is not a dot, yet followed by a literal dot. + // Extension group: Now match whatever is still left until the end $. + let re = + Regex::new(r"(?P^.*(?:\\|/))[^/\\]*?(?P\.?[^\.]+)\.(?P.*)$") + .unwrap(); + new_path_str = re.replace(&path_str, "${path}${stem}_with_UMIs.${extension}"); + } else { + // Simplified regex for the cases when the file name is given without any preceding path. + let re = Regex::new(r"(?P^\.?[^\.]+)\.(?P.*)$").unwrap(); + new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); + } PathBuf::from(new_path_str.to_string()) } @@ -196,17 +210,44 @@ mod tests { #[test] fn test_correctly_derive_output_name() { + // plain file with simple extension let p = PathBuf::from("test.fastq"); let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("test_with_UMIs.fastq")); + // plain file with multiple extensions let p = PathBuf::from("test.fastq.gz"); let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("test_with_UMIs.fastq.gz")); + // path and file with multiple extensions let p = PathBuf::from("/some/path/test.fastq.gz"); let result = append_umi_to_path(&p); assert_eq!(result, PathBuf::from("/some/path/test_with_UMIs.fastq.gz")); + + // path with hidden dir and file with multiple extensions + let p = PathBuf::from("/some/.hidden/path/test.fastq.gz"); + let result = append_umi_to_path(&p); + assert_eq!( + result, + PathBuf::from("/some/.hidden/path/test_with_UMIs.fastq.gz") + ); + + // path with hidden dir and hidden file with multiple extensions + let p = PathBuf::from("/some/.hidden/path/.test.fastq.gz"); + let result = append_umi_to_path(&p); + assert_eq!( + result, + PathBuf::from("/some/.hidden/path/.test_with_UMIs.fastq.gz") + ); + + // relative path with hidden dir and hidden file with multiple extensions + let p = PathBuf::from("./some/.hidden/path/.test.fastq.gz"); + let result = append_umi_to_path(&p); + assert_eq!( + result, + PathBuf::from("./some/.hidden/path/.test_with_UMIs.fastq.gz") + ); } #[test] From 9c800f4263bea033ddeb757e043baffddc5524fb Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 19:47:37 +0200 Subject: [PATCH 47/72] Write the first two integration tests. --- src/main.rs | 5 ++- tests/integration_tests.rs | 82 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 tests/integration_tests.rs diff --git a/src/main.rs b/src/main.rs index eab452e..f716f1e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -64,6 +64,7 @@ fn main() { | clap::error::ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand | clap::error::ErrorKind::DisplayVersion => { err.print().unwrap(); + process::exit(0); } _ => { // color green for consistency with Runtime errors. Sadly no styled formatting yet. @@ -71,10 +72,10 @@ fn main() { "{}", err.render() .if_supports_color(Stderr, |text| text.fg_rgb::<0xA7, 0xC9, 0x47>()) - ) + ); + process::exit(1); } }; - process::exit(1); }); timedrun("umi-transfer finished", || { diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs new file mode 100644 index 0000000..2b0d469 --- /dev/null +++ b/tests/integration_tests.rs @@ -0,0 +1,82 @@ +use assert_cmd::prelude::*; +use assert_fs::fixture::{NamedTempFile, TempDir}; +use assert_fs::prelude::*; +use predicates::prelude::*; +use std::path::PathBuf; +use std::process::Command; + +#[derive()] +struct TestFiles { + // Struct to hold the paths to test files. + read1: PathBuf, + read1_gz: PathBuf, + read2: PathBuf, + read2_gz: PathBuf, + umi: PathBuf, + umi_gz: PathBuf, + umi_shuffle: PathBuf, + umi_shuffle_gz: PathBuf, + existing_output: NamedTempFile, +} + +fn setup_integration_test() -> (Command, TempDir, TestFiles) { + // Get the name of the binary (umi-transfer) + let cmd = Command::cargo_bin(assert_cmd::crate_name!()) + .expect("Failed to pull binary name from Cargo.toml at compile time."); + + let temp_dir = assert_fs::TempDir::new().expect("Failed to create temporary directory"); + + temp_dir + .copy_from( + std::env::current_dir() + .expect("Failed to get directory") + .join("./tests/seqdata"), + &["*.fq", "*.gz"], + ) + .expect("Failed to copy test data to temporary directory."); + + let test_files = TestFiles { + read1: temp_dir.path().join("read1.fq"), + read1_gz: temp_dir.path().join("read1.fq.gz"), + read2: temp_dir.path().join("read2.fq"), + read2_gz: temp_dir.path().join("read2.fq.gz"), + umi: temp_dir.path().join("umi.fq"), + umi_gz: temp_dir.path().join("umi.fq.gz"), + umi_shuffle: temp_dir.path().join("umi_shuffle.fq"), + umi_shuffle_gz: temp_dir.path().join("umi_shuffle.fq.gz"), + existing_output: NamedTempFile::new("ACTG.fq").unwrap(), + }; + + return (cmd, temp_dir, test_files); +} + +// Define the integration tests +#[test] +fn main_without_arguments_prints_help() { + let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + cmd.assert() + .success() + .stderr(predicate::str::contains("Usage:")) + .stderr(predicate::str::contains("Commands:")) + .stderr(predicate::str::contains("Options:")); +} + +#[test] +fn external_with_minimal_arguments() { + let (mut cmd, temp_dir, test_files) = setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + temp_dir.close().unwrap(); +} From 059354c2035e8d8bdddb9d330cff69183ba354e4 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 20:01:37 +0200 Subject: [PATCH 48/72] Add two more integration tests. --- tests/integration_tests.rs | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 2b0d469..010a853 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -62,7 +62,23 @@ fn main_without_arguments_prints_help() { } #[test] -fn external_with_minimal_arguments() { +fn external_fails_without_arguments() { + let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + + cmd.arg("external"); + + cmd.assert() + .failure() + .stderr(predicate::str::contains( + "error: the following required arguments were not provided", + )) + .stderr(predicate::str::contains("--in ")) + .stderr(predicate::str::contains("--in2 ")) + .stderr(predicate::str::contains("--umi ")); +} + +#[test] +fn external_with_minimal_arguments_plain() { let (mut cmd, temp_dir, test_files) = setup_integration_test(); cmd.arg("external") .arg("--in") @@ -80,3 +96,23 @@ fn external_with_minimal_arguments() { temp_dir.close().unwrap(); } + +#[test] +fn external_with_minimal_arguments_compressed() { + let (mut cmd, temp_dir, test_files) = setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + temp_dir.close().unwrap(); +} From 63fb9df1ca8757d485f76f14f62b2f0b2bd2cb75 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 20:15:03 +0200 Subject: [PATCH 49/72] Refactor integration tests for clearer structure. --- tests/auxiliary.rs | 50 ++++++++++++ tests/integration_tests.rs | 118 ---------------------------- tests/integration_tests_external.rs | 62 +++++++++++++++ tests/integration_tests_main.rs | 17 ++++ 4 files changed, 129 insertions(+), 118 deletions(-) create mode 100644 tests/auxiliary.rs delete mode 100644 tests/integration_tests.rs create mode 100644 tests/integration_tests_external.rs create mode 100644 tests/integration_tests_main.rs diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs new file mode 100644 index 0000000..07395a5 --- /dev/null +++ b/tests/auxiliary.rs @@ -0,0 +1,50 @@ +use assert_cmd::prelude::*; +use assert_fs::fixture::{NamedTempFile, TempDir}; +use assert_fs::prelude::*; +use std::path::PathBuf; +use std::process::Command; + +#[derive()] +pub struct TestFiles { + // Struct to hold the paths to test files. + pub read1: PathBuf, + pub read1_gz: PathBuf, + pub read2: PathBuf, + pub read2_gz: PathBuf, + pub umi: PathBuf, + pub umi_gz: PathBuf, + pub umi_shuffle: PathBuf, + pub umi_shuffle_gz: PathBuf, + pub existing_output: NamedTempFile, +} + +pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { + // Get the name of the binary (umi-transfer) + let cmd = Command::cargo_bin(assert_cmd::crate_name!()) + .expect("Failed to pull binary name from Cargo.toml at compile time."); + + let temp_dir = assert_fs::TempDir::new().expect("Failed to create temporary directory"); + + temp_dir + .copy_from( + std::env::current_dir() + .expect("Failed to get directory") + .join("./tests/seqdata"), + &["*.fq", "*.gz"], + ) + .expect("Failed to copy test data to temporary directory."); + + let test_files = TestFiles { + read1: temp_dir.path().join("read1.fq"), + read1_gz: temp_dir.path().join("read1.fq.gz"), + read2: temp_dir.path().join("read2.fq"), + read2_gz: temp_dir.path().join("read2.fq.gz"), + umi: temp_dir.path().join("umi.fq"), + umi_gz: temp_dir.path().join("umi.fq.gz"), + umi_shuffle: temp_dir.path().join("umi_shuffle.fq"), + umi_shuffle_gz: temp_dir.path().join("umi_shuffle.fq.gz"), + existing_output: NamedTempFile::new("ACTG.fq").unwrap(), + }; + + return (cmd, temp_dir, test_files); +} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs deleted file mode 100644 index 010a853..0000000 --- a/tests/integration_tests.rs +++ /dev/null @@ -1,118 +0,0 @@ -use assert_cmd::prelude::*; -use assert_fs::fixture::{NamedTempFile, TempDir}; -use assert_fs::prelude::*; -use predicates::prelude::*; -use std::path::PathBuf; -use std::process::Command; - -#[derive()] -struct TestFiles { - // Struct to hold the paths to test files. - read1: PathBuf, - read1_gz: PathBuf, - read2: PathBuf, - read2_gz: PathBuf, - umi: PathBuf, - umi_gz: PathBuf, - umi_shuffle: PathBuf, - umi_shuffle_gz: PathBuf, - existing_output: NamedTempFile, -} - -fn setup_integration_test() -> (Command, TempDir, TestFiles) { - // Get the name of the binary (umi-transfer) - let cmd = Command::cargo_bin(assert_cmd::crate_name!()) - .expect("Failed to pull binary name from Cargo.toml at compile time."); - - let temp_dir = assert_fs::TempDir::new().expect("Failed to create temporary directory"); - - temp_dir - .copy_from( - std::env::current_dir() - .expect("Failed to get directory") - .join("./tests/seqdata"), - &["*.fq", "*.gz"], - ) - .expect("Failed to copy test data to temporary directory."); - - let test_files = TestFiles { - read1: temp_dir.path().join("read1.fq"), - read1_gz: temp_dir.path().join("read1.fq.gz"), - read2: temp_dir.path().join("read2.fq"), - read2_gz: temp_dir.path().join("read2.fq.gz"), - umi: temp_dir.path().join("umi.fq"), - umi_gz: temp_dir.path().join("umi.fq.gz"), - umi_shuffle: temp_dir.path().join("umi_shuffle.fq"), - umi_shuffle_gz: temp_dir.path().join("umi_shuffle.fq.gz"), - existing_output: NamedTempFile::new("ACTG.fq").unwrap(), - }; - - return (cmd, temp_dir, test_files); -} - -// Define the integration tests -#[test] -fn main_without_arguments_prints_help() { - let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); - cmd.assert() - .success() - .stderr(predicate::str::contains("Usage:")) - .stderr(predicate::str::contains("Commands:")) - .stderr(predicate::str::contains("Options:")); -} - -#[test] -fn external_fails_without_arguments() { - let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); - - cmd.arg("external"); - - cmd.assert() - .failure() - .stderr(predicate::str::contains( - "error: the following required arguments were not provided", - )) - .stderr(predicate::str::contains("--in ")) - .stderr(predicate::str::contains("--in2 ")) - .stderr(predicate::str::contains("--umi ")); -} - -#[test] -fn external_with_minimal_arguments_plain() { - let (mut cmd, temp_dir, test_files) = setup_integration_test(); - cmd.arg("external") - .arg("--in") - .arg(test_files.read1) - .arg("--in2") - .arg(test_files.read2) - .arg("--umi") - .arg(test_files.umi); - - cmd.assert() - .success() - .stdout(predicate::str::contains("Transferring UMIs to records")) - .stdout(predicate::str::contains("Processed 10 records")) - .stdout(predicate::str::contains("umi-transfer finished after")); - - temp_dir.close().unwrap(); -} - -#[test] -fn external_with_minimal_arguments_compressed() { - let (mut cmd, temp_dir, test_files) = setup_integration_test(); - cmd.arg("external") - .arg("--in") - .arg(test_files.read1_gz) - .arg("--in2") - .arg(test_files.read2_gz) - .arg("--umi") - .arg(test_files.umi_gz); - - cmd.assert() - .success() - .stdout(predicate::str::contains("Transferring UMIs to records")) - .stdout(predicate::str::contains("Processed 10 records")) - .stdout(predicate::str::contains("umi-transfer finished after")); - - temp_dir.close().unwrap(); -} diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs new file mode 100644 index 0000000..fcf9250 --- /dev/null +++ b/tests/integration_tests_external.rs @@ -0,0 +1,62 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::process::Command; + +#[path = "auxiliary.rs"] +mod auxiliary; + +#[test] +fn external_fails_without_arguments() { + let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + + cmd.arg("external"); + + cmd.assert() + .failure() + .stderr(predicate::str::contains( + "error: the following required arguments were not provided", + )) + .stderr(predicate::str::contains("--in ")) + .stderr(predicate::str::contains("--in2 ")) + .stderr(predicate::str::contains("--umi ")); +} + +#[test] +fn external_with_minimal_arguments_plain() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + temp_dir.close().unwrap(); +} + +#[test] +fn external_with_minimal_arguments_compressed() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + temp_dir.close().unwrap(); +} diff --git a/tests/integration_tests_main.rs b/tests/integration_tests_main.rs new file mode 100644 index 0000000..b6a322a --- /dev/null +++ b/tests/integration_tests_main.rs @@ -0,0 +1,17 @@ +use assert_cmd::prelude::*; +use predicates::prelude::*; +use std::process::Command; + +#[path = "auxiliary.rs"] +mod auxiliary; + +// Define the integration tests +#[test] +fn main_without_arguments_prints_help() { + let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + cmd.assert() + .success() + .stderr(predicate::str::contains("Usage:")) + .stderr(predicate::str::contains("Commands:")) + .stderr(predicate::str::contains("Options:")); +} From c3db2464186ed236093d0cd8b3f32523e3306e7d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 21:03:11 +0200 Subject: [PATCH 50/72] Added tests for gz file extension. --- tests/auxiliary.rs | 16 +++++- tests/integration_tests_external.rs | 88 ++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index 07395a5..fcb3e24 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -4,7 +4,10 @@ use assert_fs::prelude::*; use std::path::PathBuf; use std::process::Command; +// since those are just needed for the tests, I didn't put it in src. Therefore, using this module is not detected and dead_code warnings issued. + #[derive()] +#[allow(dead_code)] pub struct TestFiles { // Struct to hold the paths to test files. pub read1: PathBuf, @@ -16,8 +19,13 @@ pub struct TestFiles { pub umi_shuffle: PathBuf, pub umi_shuffle_gz: PathBuf, pub existing_output: NamedTempFile, + pub new_output_read1: PathBuf, + pub new_output_read1_gz: PathBuf, + pub new_output_read2: PathBuf, + pub new_output_read2_gz: PathBuf, } +#[allow(dead_code)] pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { // Get the name of the binary (umi-transfer) let cmd = Command::cargo_bin(assert_cmd::crate_name!()) @@ -41,9 +49,13 @@ pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { read2_gz: temp_dir.path().join("read2.fq.gz"), umi: temp_dir.path().join("umi.fq"), umi_gz: temp_dir.path().join("umi.fq.gz"), - umi_shuffle: temp_dir.path().join("umi_shuffle.fq"), - umi_shuffle_gz: temp_dir.path().join("umi_shuffle.fq.gz"), + umi_shuffle: temp_dir.path().join("umi_shuffled.fq"), + umi_shuffle_gz: temp_dir.path().join("umi_shuffled.fq.gz"), existing_output: NamedTempFile::new("ACTG.fq").unwrap(), + new_output_read1: temp_dir.path().join("read1_out.fq"), + new_output_read1_gz: temp_dir.path().join("read1_out.fq.gz"), + new_output_read2: temp_dir.path().join("read2_out.fq"), + new_output_read2_gz: temp_dir.path().join("read2_out.fq.gz"), }; return (cmd, temp_dir, test_files); diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index fcf9250..1cadc9e 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -1,4 +1,5 @@ use assert_cmd::prelude::*; +use assert_fs::prelude::*; use predicates::prelude::*; use std::process::Command; @@ -38,6 +39,14 @@ fn external_with_minimal_arguments_plain() { .stdout(predicate::str::contains("Processed 10 records")) .stdout(predicate::str::contains("umi-transfer finished after")); + temp_dir + .child("read1_with_UMIs.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq") + .assert(predicate::path::exists()); + temp_dir.close().unwrap(); } @@ -50,7 +59,8 @@ fn external_with_minimal_arguments_compressed() { .arg("--in2") .arg(test_files.read2_gz) .arg("--umi") - .arg(test_files.umi_gz); + .arg(test_files.umi_gz) + .arg("--gzip"); cmd.assert() .success() @@ -58,5 +68,81 @@ fn external_with_minimal_arguments_compressed() { .stdout(predicate::str::contains("Processed 10 records")) .stdout(predicate::str::contains("umi-transfer finished after")); + temp_dir + .child("read1_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq.gz") + .assert(predicate::path::exists()); + + temp_dir.close().unwrap(); +} + +#[test] +fn external_with_output_gz_suffix_no_compression() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz) + .arg("--out") + .arg(test_files.new_output_read1_gz) + .arg("--out2") + .arg(test_files.new_output_read2_gz); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + // Even though --out "read1_out.fq.gz" and --out2 "read2_out.fq.gz" are explicitly specified, + // the .gz suffix will be automatically removed if no -z / --gzip was chosen. + // This behavior was chosen because of FIFOs use. + + temp_dir + .child("read1_out.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_out.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read1_out.fq.gz") + .assert(predicate::path::missing()); + + temp_dir + .child("read2_out.fq.gz") + .assert(predicate::path::missing()); + + temp_dir.close().unwrap(); +} + +#[test] +fn external_fails_on_read_id_mismatch() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_shuffle_gz); + + cmd.assert() + .failure() + .stderr(predicate::str::contains("Failed to include the UMIs")) + .stderr(predicate::str::contains( + "IDs of UMI and read records mismatch", + )) + .stderr(predicate::str::contains( + "Please provide sorted files as input", + )); + temp_dir.close().unwrap(); } From 77d18424cba5faded1ab49e02367120a33980f96 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 22:00:31 +0200 Subject: [PATCH 51/72] Add more integration tests: Existing and non-existing output. --- tests/auxiliary.rs | 7 +- tests/integration_tests_external.rs | 150 +++++++++++++++++++++++++++- 2 files changed, 150 insertions(+), 7 deletions(-) diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index fcb3e24..78ffd5a 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -1,8 +1,7 @@ -use assert_cmd::prelude::*; +use assert_cmd::Command; use assert_fs::fixture::{NamedTempFile, TempDir}; use assert_fs::prelude::*; use std::path::PathBuf; -use std::process::Command; // since those are just needed for the tests, I didn't put it in src. Therefore, using this module is not detected and dead_code warnings issued. @@ -18,7 +17,7 @@ pub struct TestFiles { pub umi_gz: PathBuf, pub umi_shuffle: PathBuf, pub umi_shuffle_gz: PathBuf, - pub existing_output: NamedTempFile, + pub nonexisting_output: PathBuf, pub new_output_read1: PathBuf, pub new_output_read1_gz: PathBuf, pub new_output_read2: PathBuf, @@ -51,7 +50,7 @@ pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { umi_gz: temp_dir.path().join("umi.fq.gz"), umi_shuffle: temp_dir.path().join("umi_shuffled.fq"), umi_shuffle_gz: temp_dir.path().join("umi_shuffled.fq.gz"), - existing_output: NamedTempFile::new("ACTG.fq").unwrap(), + nonexisting_output: NamedTempFile::new("ACTG.fq").unwrap().path().to_path_buf(), //goes out of scope too early new_output_read1: temp_dir.path().join("read1_out.fq"), new_output_read1_gz: temp_dir.path().join("read1_out.fq.gz"), new_output_read2: temp_dir.path().join("read2_out.fq"), diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index 1cadc9e..6a3441b 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -1,7 +1,6 @@ -use assert_cmd::prelude::*; +use assert_cmd::Command; use assert_fs::prelude::*; use predicates::prelude::*; -use std::process::Command; #[path = "auxiliary.rs"] mod auxiliary; @@ -79,6 +78,50 @@ fn external_with_minimal_arguments_compressed() { temp_dir.close().unwrap(); } +#[test] +fn external_with_output_no_gz_suffix_compression() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz) + .arg("--out") + .arg(test_files.new_output_read1) + .arg("--out2") + .arg(test_files.new_output_read2) + .arg("--gzip"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + // Even though --out "read1_out.fq" and --out2 "read2_out.fq" are explicitly specified, + // a .gz suffix will be automatically added when compressed output is generated. + + temp_dir + .child("read1_out.fq") + .assert(predicate::path::missing()); + + temp_dir + .child("read2_out.fq") + .assert(predicate::path::missing()); + + temp_dir + .child("read1_out.fq.gz") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_out.fq.gz") + .assert(predicate::path::exists()); + + temp_dir.close().unwrap(); +} + #[test] fn external_with_output_gz_suffix_no_compression() { let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); @@ -102,7 +145,6 @@ fn external_with_output_gz_suffix_no_compression() { // Even though --out "read1_out.fq.gz" and --out2 "read2_out.fq.gz" are explicitly specified, // the .gz suffix will be automatically removed if no -z / --gzip was chosen. - // This behavior was chosen because of FIFOs use. temp_dir .child("read1_out.fq") @@ -123,6 +165,108 @@ fn external_with_output_gz_suffix_no_compression() { temp_dir.close().unwrap(); } +#[test] +fn external_fails_with_nonexisting_output_file() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz) + .arg("--out") + .arg(test_files.nonexisting_output) + .arg("--out2") + .arg(test_files.new_output_read2_gz); + + cmd.assert() + .failure() + .stderr(predicate::str::contains("Failed to include the UMIs")) + .stderr(predicate::str::contains("Caused by:")) + .stderr(predicate::str::contains("Output file")) + .stderr(predicate::str::contains("is missing or not writeable")); + + temp_dir + .child("read2_out.fq") + .assert(predicate::path::missing()); + + temp_dir.close().unwrap(); +} + +#[test] +fn external_fails_with_existing_output_file_and_no_force() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + + // create an existing output file + temp_dir + .child("read1_out.fq") + .write_str("GCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATA") + .unwrap(); + + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz) + .arg("--out") + .arg(test_files.new_output_read1_gz) + .arg("--out2") + .arg(test_files.new_output_read2_gz) + .write_stdin("yes\n".as_bytes()); + + cmd.assert() + .failure() + .stderr(predicate::str::contains("Failed to include the UMIs")) + .stderr(predicate::str::contains("Caused by:")) + .stderr(predicate::str::contains("exists. Overwrite? (y/n)")) + .stderr(predicate::str::contains("Not a terminal")); + + temp_dir + .child("read2_out.fq") + .assert(predicate::path::missing()); + + temp_dir.close().unwrap(); +} + +#[test] +fn external_succeeds_with_existing_output_file_and_force() { + let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + + // create an existing output file + temp_dir + .child("read1_out.fq") + .write_str("GCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATA") + .unwrap(); + + cmd.arg("external") + .arg("--in") + .arg(test_files.read1_gz) + .arg("--in2") + .arg(test_files.read2_gz) + .arg("--umi") + .arg(test_files.umi_gz) + .arg("--out") + .arg(test_files.new_output_read1_gz) + .arg("--out2") + .arg(test_files.new_output_read2_gz) + .arg("--force"); + + cmd.assert() + .success() + .stdout(predicate::str::contains("Transferring UMIs to records")) + .stdout(predicate::str::contains("Processed 10 records")) + .stdout(predicate::str::contains("umi-transfer finished after")); + + temp_dir + .child("read2_out.fq") + .assert(predicate::path::missing()); + + temp_dir.close().unwrap(); +} + #[test] fn external_fails_on_read_id_mismatch() { let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); From 35efe00d2853461ad91c349f12daf38764098f4c Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Wed, 12 Jul 2023 22:13:03 +0200 Subject: [PATCH 52/72] Remove a needless_late_init warning from clippy in file_io.rs --- src/file_io.rs | 14 +++++++------- tests/integration_tests_external.rs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index c726a7c..98c809e 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -3,7 +3,7 @@ use anyhow::{anyhow, Context, Result}; use dialoguer::{theme::ColorfulTheme, Confirm}; use file_format::FileFormat; use regex::Regex; -use std::{borrow::Cow, fs, path::Path, path::PathBuf}; +use std::{fs, path::Path, path::PathBuf}; // Defining types for simplicity type File = std::fs::File; @@ -177,21 +177,21 @@ pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result pub fn append_umi_to_path(path: &Path) -> PathBuf { let path_str = path.as_os_str().to_string_lossy(); - let new_path_str: Cow<'_, str>; - - if path_str.contains('\\') || path_str.contains('/') { + let new_path_str = if path_str.contains('\\') || path_str.contains('/') { // Path group: Match everything until a forward or backward slash not followed by a forward or backward slash non-greedy (*?) // Stem group: Match literal dot zero or one time, and everything thereafter that is not a dot, yet followed by a literal dot. // Extension group: Now match whatever is still left until the end $. let re = Regex::new(r"(?P^.*(?:\\|/))[^/\\]*?(?P\.?[^\.]+)\.(?P.*)$") .unwrap(); - new_path_str = re.replace(&path_str, "${path}${stem}_with_UMIs.${extension}"); + let new_path_str = re.replace(&path_str, "${path}${stem}_with_UMIs.${extension}"); + new_path_str } else { // Simplified regex for the cases when the file name is given without any preceding path. let re = Regex::new(r"(?P^\.?[^\.]+)\.(?P.*)$").unwrap(); - new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); - } + let new_path_str = re.replace(&path_str, "${stem}_with_UMIs.${extension}"); + new_path_str + }; PathBuf::from(new_path_str.to_string()) } diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index 6a3441b..0342cd2 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -262,7 +262,7 @@ fn external_succeeds_with_existing_output_file_and_force() { temp_dir .child("read2_out.fq") - .assert(predicate::path::missing()); + .assert(predicate::path::exists()); temp_dir.close().unwrap(); } From 2263a22443b50196857dc0f9a9a68b0dfb8ca5f3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Jul 2023 13:58:11 +0200 Subject: [PATCH 53/72] Create validated output files to be used in tests and correct header glitch in input. --- tests/integration_tests_main.rs | 17 +++++++++- tests/results/correct_read1.fq | 40 ++++++++++++++++++++++++ tests/results/correct_read2.fq | 40 ++++++++++++++++++++++++ tests/results/corrected_read1.fq | 40 ++++++++++++++++++++++++ tests/results/corrected_read2.fq | 40 ++++++++++++++++++++++++ tests/results/delim_underscore_read1.fq | 40 ++++++++++++++++++++++++ tests/results/delim_underscore_read2.fq | 40 ++++++++++++++++++++++++ tests/results/umi_read2_switch_read1.fq | 40 ++++++++++++++++++++++++ tests/results/umi_read2_switch_read2.fq | 40 ++++++++++++++++++++++++ tests/seqdata/read1.fq | 20 ++++++------ tests/seqdata/read1.fq.gz | Bin 583 -> 612 bytes tests/seqdata/read2.fq | 20 ++++++------ tests/seqdata/read2.fq.gz | Bin 618 -> 647 bytes 13 files changed, 356 insertions(+), 21 deletions(-) create mode 100644 tests/results/correct_read1.fq create mode 100644 tests/results/correct_read2.fq create mode 100644 tests/results/corrected_read1.fq create mode 100644 tests/results/corrected_read2.fq create mode 100644 tests/results/delim_underscore_read1.fq create mode 100644 tests/results/delim_underscore_read2.fq create mode 100644 tests/results/umi_read2_switch_read1.fq create mode 100644 tests/results/umi_read2_switch_read2.fq diff --git a/tests/integration_tests_main.rs b/tests/integration_tests_main.rs index b6a322a..cbe1b98 100644 --- a/tests/integration_tests_main.rs +++ b/tests/integration_tests_main.rs @@ -5,13 +5,28 @@ use std::process::Command; #[path = "auxiliary.rs"] mod auxiliary; -// Define the integration tests #[test] fn main_without_arguments_prints_help() { let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + // Clap prints help to stderr in this case, but to stdout with -h or --help. cmd.assert() .success() .stderr(predicate::str::contains("Usage:")) .stderr(predicate::str::contains("Commands:")) .stderr(predicate::str::contains("Options:")); } + +#[test] +fn main_help_prints_help() { + let mut cmd = Command::cargo_bin(assert_cmd::crate_name!()).unwrap(); + + // Clap prints help to stdout with -h or --help. + for help in &["-h", "--help"] { + cmd.arg(help) + .assert() + .success() + .stdout(predicate::str::contains("Usage:")) + .stdout(predicate::str::contains("Commands:")) + .stdout(predicate::str::contains("Options:")); + } +} diff --git a/tests/results/correct_read1.fq b/tests/results/correct_read1.fq new file mode 100644 index 0000000..9345d26 --- /dev/null +++ b/tests/results/correct_read1.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:CCTGAGACC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AGACATGAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:TGGACGCAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GCCTAAACG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:AATTGAAGT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AACAACAGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TCACTTATT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GATATGAGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:CTAAATTGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:TACCAAGGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/correct_read2.fq b/tests/results/correct_read2.fq new file mode 100644 index 0000000..22ca837 --- /dev/null +++ b/tests/results/correct_read2.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:CCTGAGACC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AGACATGAC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:TGGACGCAC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT ++ +FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GCCTAAACG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:AATTGAAGT 3:N:0:GCTTCAGGGT+AAGGTAGCGT +CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA ++ +F:FFFFF:FFFFFFFFFFFFFF::F:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFF:FF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AACAACAGA 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TCACTTATT 3:N:0:GCTTCAGGGT+AAGGTAGCGT +TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC ++ +FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GATATGAGG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:CTAAATTGG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:TACCAAGGA 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:,F:FFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/corrected_read1.fq b/tests/results/corrected_read1.fq new file mode 100644 index 0000000..9345d26 --- /dev/null +++ b/tests/results/corrected_read1.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:CCTGAGACC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AGACATGAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:TGGACGCAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GCCTAAACG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:AATTGAAGT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AACAACAGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TCACTTATT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GATATGAGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:CTAAATTGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:TACCAAGGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/corrected_read2.fq b/tests/results/corrected_read2.fq new file mode 100644 index 0000000..c947eb1 --- /dev/null +++ b/tests/results/corrected_read2.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:CCTGAGACC 2:N:0:GCTTCAGGGT+AAGGTAGCGT +ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AGACATGAC 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:TGGACGCAC 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT ++ +FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GCCTAAACG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:AATTGAAGT 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA ++ +F:FFFFF:FFFFFFFFFFFFFF::F:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFF:FF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AACAACAGA 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TCACTTATT 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC ++ +FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GATATGAGG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:CTAAATTGG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:TACCAAGGA 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:,F:FFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/delim_underscore_read1.fq b/tests/results/delim_underscore_read1.fq new file mode 100644 index 0000000..4616550 --- /dev/null +++ b/tests/results/delim_underscore_read1.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016_CCTGAGACC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016_AGACATGAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016_TGGACGCAC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016_GCCTAAACG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016_AATTGAAGT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016_AACAACAGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031_TCACTTATT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031_GATATGAGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031_CTAAATTGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031_TACCAAGGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/delim_underscore_read2.fq b/tests/results/delim_underscore_read2.fq new file mode 100644 index 0000000..8abb80d --- /dev/null +++ b/tests/results/delim_underscore_read2.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016_CCTGAGACC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016_AGACATGAC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016_TGGACGCAC 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT ++ +FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016_GCCTAAACG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016_AATTGAAGT 3:N:0:GCTTCAGGGT+AAGGTAGCGT +CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA ++ +F:FFFFF:FFFFFFFFFFFFFF::F:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFF:FF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016_AACAACAGA 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031_TCACTTATT 3:N:0:GCTTCAGGGT+AAGGTAGCGT +TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC ++ +FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031_GATATGAGG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031_CTAAATTGG 3:N:0:GCTTCAGGGT+AAGGTAGCGT +GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031_TACCAAGGA 3:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA ++ +FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:,F:FFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/umi_read2_switch_read1.fq b/tests/results/umi_read2_switch_read1.fq new file mode 100644 index 0000000..e1bea6b --- /dev/null +++ b/tests/results/umi_read2_switch_read1.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA 1:N:0:GCTTCAGGGT+AAGGTAGCGT +TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA ++ +F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/results/umi_read2_switch_read2.fq b/tests/results/umi_read2_switch_read2.fq new file mode 100644 index 0000000..51367a3 --- /dev/null +++ b/tests/results/umi_read2_switch_read2.fq @@ -0,0 +1,40 @@ +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016:ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CCTGAGACC ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016:AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AGACATGAC ++ +:FFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016:AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TGGACGCAC ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016:GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GCCTAAACG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016:CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AATTGAAGT ++ +FFFFF:FFF +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016:AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +AACAACAGA ++ +FFFFFFF:F +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031:TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TCACTTATT ++ +:F:FFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031:GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT 2:N:0:GCTTCAGGGT+AAGGTAGCGT +GATATGAGG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031:GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG 2:N:0:GCTTCAGGGT+AAGGTAGCGT +CTAAATTGG ++ +FFFFFFFFF +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031:AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA 2:N:0:GCTTCAGGGT+AAGGTAGCGT +TACCAAGGA ++ +FFFFFFFFF diff --git a/tests/seqdata/read1.fq b/tests/seqdata/read1.fq index d5efacd..4fc3835 100644 --- a/tests/seqdata/read1.fq +++ b/tests/seqdata/read1.fq @@ -1,40 +1,40 @@ -@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT AATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGG + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT TAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGAC + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT GTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGT + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT GGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCTTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAA + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT GCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATG + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFF:FFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 1:N:0:GCTTCAGGGT+AAGGTAGCGT TCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGC + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 1:N:0:GCTTCAGGGT+AAGGTAGCGT TGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTT + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 1:N:0:GCTTCAGGGT+AAGGTAGCGT TCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTT + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 1:N:0:GCTTCAGGGT+AAGGTAGCGT TGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCT + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 1:N:0:GCTTCAGGGT+AAGGTAGCGT TTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTA + F#FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/seqdata/read1.fq.gz b/tests/seqdata/read1.fq.gz index 905bb435be0fe0dffe1b37b066be0c69281df4fc..895d2e907ee25c2b9860c935fd7da108a0818a7b 100644 GIT binary patch literal 612 zcmV-q0-OCGiwFn{Z!KX)Lq5Qd?nGjn3^{tuBP~kxl3FMCYlPzF~#CpJc&M9%!;ALbi^ShI^5@#p>uq2 zBU(n8HLGU-zJLCw*fAr>T(kiJL<C2{T7eFz}oM(*`%CpVvQBo3!25vana03=9SoBo)Gi_#*le-xyE+2jzFV z)SrXP%i>(kv{vM#~VI33S5rQeJu*O%}7qmYyp~)qDda_=zUZ3IG7Aog5ti literal 583 zcmV-N0=WGjiwFqzH>_j;19D|yWHBygaRAL$%TB{E5WMF%IC9CWRg)9TimoaZF6{q* z3p+DTC4_jWfYb^(c4Awe$Bub;_0#!ud^+3Xy6gM(B~Gt-eqM)l82W0KWtrFN?K`vd zaQs=sFtfDy8G^Ucl`ff_wY6F zako1R0uHPJoB$7Wz*`LV0URGr#qQ2m;v`<9h^l176HYAO{gO{f2-qQv(i@3b^AWKcqeHY5R8;r<-n^|M^=G(;ex42u_!ilWaLe{zODfJ-&%DN*#SGu_S~Ff&!4ftWH}qyHIESgi V*Cp|{k^V&ibZ-XkVcNF|001hJ6wv?x diff --git a/tests/seqdata/read2.fq b/tests/seqdata/read2.fq index 92154b5..5b77ae9 100644 --- a/tests/seqdata/read2.fq +++ b/tests/seqdata/read2.fq @@ -1,40 +1,40 @@ -@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 +@SCILIFELAB:500:NGISTLM:1:1101:19994:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT ATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTG + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFF:FFFF:FFFFF:FFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 +@SCILIFELAB:500:NGISTLM:1:1101:25581:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT AAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAAT + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31006:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT AACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACT + FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31620:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT GTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTA + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 +@SCILIFELAB:500:NGISTLM:1:1101:31693:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT CACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGA + F:FFFFF:FFFFFFFFFFFFFF::F:FFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFF:FFFFFF:FF:FFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 +@SCILIFELAB:500:NGISTLM:1:1101:32832:1016 3:N:0:GCTTCAGGGT+AAGGTAGCGT AGGAAAGGATACTCGTTATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGG + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFF:,FFFFFFFFFFFFFFF: -@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 +@SCILIFELAB:500:NGISTLM:1:1101:2320:1031 3:N:0:GCTTCAGGGT+AAGGTAGCGT TTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCAGATTTGTCGTCACAGGTTGCGCCGCCAAAACGGCGGCTACAGTAACTTTTCCCAGC + FFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFF:FFFFFFFFFFFFF,FFFFF -@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 +@SCILIFELAB:500:NGISTLM:1:1101:2446:1031 3:N:0:GCTTCAGGGT+AAGGTAGCGT GAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGT + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF -@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 +@SCILIFELAB:500:NGISTLM:1:1101:5249:1031 3:N:0:GCTTCAGGGT+AAGGTAGCGT GAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAG + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F,FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF: -@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 +@SCILIFELAB:500:NGISTLM:1:1101:10438:1031 3:N:0:GCTTCAGGGT+AAGGTAGCGT AATCGTTAGTTGATGGCGAAAGGTCGCAAAGTAAGAGCTTCTCGAACTGCGCAAGGATAGGTCGAATTTTCTCATTTTCCGCCAGCAGTCCACTTCGATTTA + FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:F:,F:FFFFFFFFFFFF:FF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF diff --git a/tests/seqdata/read2.fq.gz b/tests/seqdata/read2.fq.gz index 4f9ed6d219275aebf7cdf1867820a3fa532940fe..eb2813c7e470194fc0efcd00340af2e1de61f2a7 100644 GIT binary patch literal 647 zcmV;20(ku&iwFo9kyme;cD`|@U|cR&9tLm7rX9?P=KC2ISp zsl1iGn7XT&nLkUsTujY7i9VMoYO1bLb&bIoin*%jca!)zdU7Hca7=~M1XE%;)z=#g!=-#_l&}3+x|jP<#@+5PY=9yNhd=h*(nnpWOJ=1WP2?7KU%Yz32LV;f(3sMp8gd)aTUQO5w z=+dU8nvmH=F-SgOLS}tp@}RSLTB$DGE;;iF9!lLHOww%nxNo(POQ8 zt7Wdim(Ypv$C;GlS|Il(x0nq5JRRCl_@zdV%rn;X7k+}}C6ZR6)RQt*ssV8pg@w_4 hYu??wsKHYyw}-Bt0>fB;wKas-{RjW~jN{J=002l=F>C+; literal 618 zcmV-w0+szAiwFnU9#A{($oK~NrRnm#9@s2y z1C>5>4Isd5nLtny9R5W%Q@!p47h^9=z6!}SOQY1HV790mB0?yjkZ;@%I(@zE`*nt3 zlY1Pe+&d-o6&S`C>_LV8G1%az*0?g)F^S#HH9Ac)l#zQ5&z`*M-mkpCNUye8%akmKo E0DvtjivR!s From 7e94bb99c0354b6c8f0be7a34fe68246db2744f5 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Jul 2023 18:11:33 +0200 Subject: [PATCH 54/72] Create TestResults struct to hold the paths to the validated program outputs. --- tests/auxiliary.rs | 45 +++++++++++++++++++++++++++-- tests/integration_tests_external.rs | 16 +++++----- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index 78ffd5a..f02df2e 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -24,8 +24,22 @@ pub struct TestFiles { pub new_output_read2_gz: PathBuf, } +pub struct TestOutput { + // Struct to hold the paths to validated output files. + pub correct_read1: PathBuf, + pub correct_read2: PathBuf, + pub corrected_read1: PathBuf, + pub corrected_read2: PathBuf, + pub delim_underscore_read1: PathBuf, + pub delim_underscore_read2: PathBuf, + pub umi_read2_switch_read1: PathBuf, + pub umi_read2_switch_read2: PathBuf, +} + #[allow(dead_code)] -pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { +pub fn setup_integration_test( + with_results: bool, +) -> (Command, TempDir, TestFiles, Option) { // Get the name of the binary (umi-transfer) let cmd = Command::cargo_bin(assert_cmd::crate_name!()) .expect("Failed to pull binary name from Cargo.toml at compile time."); @@ -41,6 +55,17 @@ pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { ) .expect("Failed to copy test data to temporary directory."); + if with_results { + temp_dir + .copy_from( + std::env::current_dir() + .expect("Failed to get directory") + .join("./tests/results"), + &["*.fq"], + ) + .expect("Failed to copy result data to temporary directory."); + }; + let test_files = TestFiles { read1: temp_dir.path().join("read1.fq"), read1_gz: temp_dir.path().join("read1.fq.gz"), @@ -57,5 +82,21 @@ pub fn setup_integration_test() -> (Command, TempDir, TestFiles) { new_output_read2_gz: temp_dir.path().join("read2_out.fq.gz"), }; - return (cmd, temp_dir, test_files); + let test_output = if with_results { + let temp = TestOutput { + correct_read1: temp_dir.path().join("correct_read1.fq"), + correct_read2: temp_dir.path().join("correct_read2.fq"), + corrected_read1: temp_dir.path().join("corrected_read1.fq"), + corrected_read2: temp_dir.path().join("corrected_read2.fq"), + delim_underscore_read1: temp_dir.path().join("delim_underscore_read1.fq"), + delim_underscore_read2: temp_dir.path().join("delim_underscore_read2.fq"), + umi_read2_switch_read1: temp_dir.path().join("umi_read2_switch_read1.fq"), + umi_read2_switch_read2: temp_dir.path().join("umi_read2_switch_read2.fq"), + }; + Some(temp) + } else { + None + }; + + return (cmd, temp_dir, test_files, test_output); } diff --git a/tests/integration_tests_external.rs b/tests/integration_tests_external.rs index 0342cd2..5e185e9 100644 --- a/tests/integration_tests_external.rs +++ b/tests/integration_tests_external.rs @@ -23,7 +23,7 @@ fn external_fails_without_arguments() { #[test] fn external_with_minimal_arguments_plain() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1) @@ -51,7 +51,7 @@ fn external_with_minimal_arguments_plain() { #[test] fn external_with_minimal_arguments_compressed() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) @@ -80,7 +80,7 @@ fn external_with_minimal_arguments_compressed() { #[test] fn external_with_output_no_gz_suffix_compression() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) @@ -124,7 +124,7 @@ fn external_with_output_no_gz_suffix_compression() { #[test] fn external_with_output_gz_suffix_no_compression() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) @@ -167,7 +167,7 @@ fn external_with_output_gz_suffix_no_compression() { #[test] fn external_fails_with_nonexisting_output_file() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) @@ -196,7 +196,7 @@ fn external_fails_with_nonexisting_output_file() { #[test] fn external_fails_with_existing_output_file_and_no_force() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); // create an existing output file temp_dir @@ -233,7 +233,7 @@ fn external_fails_with_existing_output_file_and_no_force() { #[test] fn external_succeeds_with_existing_output_file_and_force() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); // create an existing output file temp_dir @@ -269,7 +269,7 @@ fn external_succeeds_with_existing_output_file_and_force() { #[test] fn external_fails_on_read_id_mismatch() { - let (mut cmd, temp_dir, test_files) = auxiliary::setup_integration_test(); + let (mut cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); cmd.arg("external") .arg("--in") .arg(test_files.read1_gz) From 8ff157065732e1ac87857984099536ab528c666d Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Jul 2023 23:03:32 +0200 Subject: [PATCH 55/72] Wrote an auxiliary function verify_file_contents() for the tests, so I can test if the output looks as expected. --- tests/auxiliary.rs | 26 ++++++++++- ...integration_tests_external_filecontents.rs | 46 +++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 tests/integration_tests_external_filecontents.rs diff --git a/tests/auxiliary.rs b/tests/auxiliary.rs index f02df2e..c6720b3 100644 --- a/tests/auxiliary.rs +++ b/tests/auxiliary.rs @@ -1,8 +1,9 @@ +use anyhow::{anyhow, Result}; use assert_cmd::Command; use assert_fs::fixture::{NamedTempFile, TempDir}; use assert_fs::prelude::*; +use predicates::prelude::*; use std::path::PathBuf; - // since those are just needed for the tests, I didn't put it in src. Therefore, using this module is not detected and dead_code warnings issued. #[derive()] @@ -24,6 +25,8 @@ pub struct TestFiles { pub new_output_read2_gz: PathBuf, } +#[derive()] +#[allow(dead_code)] pub struct TestOutput { // Struct to hold the paths to validated output files. pub correct_read1: PathBuf, @@ -100,3 +103,24 @@ pub fn setup_integration_test( return (cmd, temp_dir, test_files, test_output); } + +// Function to compare two files, used to test if the program output matches the reference. +#[allow(dead_code)] +pub fn verify_file_contents(test_file: &PathBuf, reference_file: &PathBuf) -> Result { + let test_file_content = std::fs::read_to_string(&test_file) + .map_err(|err| anyhow!("Failed to read test file: {}", err))?; + let reference_file_content = std::fs::read_to_string(&reference_file) + .map_err(|err| anyhow!("Failed to read reference file: {}", err))?; + + let predicate_fn = predicate::str::diff(reference_file_content); + + if predicate_fn.eval(&test_file_content) { + Ok(true) + } else { + Err(anyhow!( + "{} and {} did not match!", + reference_file.file_name().unwrap().to_string_lossy(), + test_file.file_name().unwrap().to_string_lossy() + )) + } +} diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs new file mode 100644 index 0000000..5d85730 --- /dev/null +++ b/tests/integration_tests_external_filecontents.rs @@ -0,0 +1,46 @@ +use assert_fs::prelude::*; +use auxiliary::verify_file_contents; +use predicates::prelude::*; +use std::error::Error; + +#[path = "auxiliary.rs"] +mod auxiliary; + +type TestResult = Result<(), Box>; + +#[test] +fn external_produces_correct_output() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_contents( + &temp_dir.child("read1_with_UMIs.fq").to_path_buf(), + &reference.correct_read1, + )?; + + verify_file_contents( + &temp_dir.child("read2_with_UMIs.fq").to_path_buf(), + &reference.correct_read2, + )?; + + temp_dir.close().unwrap(); + Ok(()) +} From 7a84a3a1fc0321cb245f77ed054b10a1ef4d14dc Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Thu, 13 Jul 2023 23:27:38 +0200 Subject: [PATCH 56/72] Finished the integration tests to test file output. --- ...integration_tests_external_filecontents.rs | 144 +++++++++++++++++- 1 file changed, 143 insertions(+), 1 deletion(-) diff --git a/tests/integration_tests_external_filecontents.rs b/tests/integration_tests_external_filecontents.rs index 5d85730..8ba514a 100644 --- a/tests/integration_tests_external_filecontents.rs +++ b/tests/integration_tests_external_filecontents.rs @@ -8,6 +8,32 @@ mod auxiliary; type TestResult = Result<(), Box>; +// First two tests to test that my tests will work. + +#[test] +fn testing_file_verification_succeeds() -> TestResult { + let (mut _cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); + + // the same file should be identical + verify_file_contents(&test_files.read1, &test_files.read1)?; + + temp_dir.close()?; + Ok(()) +} + +#[test] +#[should_panic(expected = "read2.fq and read1.fq did not match!")] +fn testing_file_verification_fails() { + let (mut _cmd, temp_dir, test_files, _test_output) = auxiliary::setup_integration_test(false); + + // the same file should be identical + verify_file_contents(&test_files.read1, &test_files.read2).unwrap(); + + temp_dir.close().unwrap(); +} + +// Yep, verify_file_contents() does its job. Ready to rumble! + #[test] fn external_produces_correct_output() -> TestResult { let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); @@ -41,6 +67,122 @@ fn external_produces_correct_output() -> TestResult { &reference.correct_read2, )?; - temp_dir.close().unwrap(); + temp_dir.close()?; + Ok(()) +} + +#[test] +fn external_corrects_read_numbers_in_output() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi) + .arg("--correct_numbers"); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_contents( + &temp_dir.child("read1_with_UMIs.fq").to_path_buf(), + &reference.corrected_read1, + )?; + + verify_file_contents( + &temp_dir.child("read2_with_UMIs.fq").to_path_buf(), + &reference.corrected_read2, + )?; + + temp_dir.close()?; + Ok(()) +} + +#[test] +fn external_underscore_delimiter() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.read2) + .arg("--umi") + .arg(test_files.umi) + .arg("--delim") + .arg("_"); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("read2_with_UMIs.fq") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_contents( + &temp_dir.child("read1_with_UMIs.fq").to_path_buf(), + &reference.delim_underscore_read1, + )?; + + verify_file_contents( + &temp_dir.child("read2_with_UMIs.fq").to_path_buf(), + &reference.delim_underscore_read2, + )?; + + temp_dir.close()?; + Ok(()) +} + +// Not really a serious test, but one can also integrate the read sequence into the UMI header if needed ;-) + +#[test] +fn external_switch_umi_and_read2() -> TestResult { + let (mut cmd, temp_dir, test_files, test_output) = auxiliary::setup_integration_test(true); + cmd.arg("external") + .arg("--in") + .arg(test_files.read1) + .arg("--in2") + .arg(test_files.umi) + .arg("--umi") + .arg(test_files.read2); + + cmd.assert().success(); //further assertions have been tested in other tests + + temp_dir + .child("read1_with_UMIs.fq") + .assert(predicate::path::exists()); + + temp_dir + .child("umi_with_UMIs.fq") + .assert(predicate::path::exists()); + + let reference = test_output.unwrap(); + + verify_file_contents( + &temp_dir.child("read1_with_UMIs.fq").to_path_buf(), + &reference.umi_read2_switch_read1, + )?; + + verify_file_contents( + &temp_dir.child("umi_with_UMIs.fq").to_path_buf(), + &reference.umi_read2_switch_read2, + )?; + + temp_dir.close()?; Ok(()) } From 053f147a1911e8acf7fb54aa940b3a7951c64837 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Jul 2023 21:32:04 +0200 Subject: [PATCH 57/72] Create new Testing workflow comprising Clippy and Tarpaulin. --- .github/workflows/clippy.yml | 48 ----------- .../workflows/{dockerhub.yml => release.yml} | 0 .github/workflows/rust.yml | 21 ----- .github/workflows/testing.yml | 86 +++++++++++++++++++ README.md | 2 +- 5 files changed, 87 insertions(+), 70 deletions(-) delete mode 100644 .github/workflows/clippy.yml rename .github/workflows/{dockerhub.yml => release.yml} (100%) delete mode 100644 .github/workflows/rust.yml create mode 100644 .github/workflows/testing.yml diff --git a/.github/workflows/clippy.yml b/.github/workflows/clippy.yml deleted file mode 100644 index 03b0edd..0000000 --- a/.github/workflows/clippy.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Clippy is a tool that runs a bunch of tests on the Rust code to catch common mistakes and discouraged patterns. - -name: Analyze with Rust Clippy - -on: - push: - branches: - - main - - dev - pull_request: - branches: - - main - - dev - -jobs: - rust-clippy-analyze: - name: Run rust-clippy analyzing - runs-on: ubuntu-latest - permissions: - contents: read - security-events: write - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Install Rust toolchain - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af #@v1 - with: - profile: minimal - toolchain: stable - components: clippy - override: true - - - name: Install required cargo - run: cargo install clippy-sarif sarif-fmt - - - name: Run rust-clippy - run: - cargo clippy - --all-features - --message-format=json | clippy-sarif | tee rust-clippy-results.sarif | sarif-fmt - continue-on-error: true - - - name: Upload analysis results to GitHub - uses: github/codeql-action/upload-sarif@v1 - with: - sarif_file: rust-clippy-results.sarif - wait-for-processing: true \ No newline at end of file diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/release.yml similarity index 100% rename from .github/workflows/dockerhub.yml rename to .github/workflows/release.yml diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index d79b62c..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Test successful compilation on Rust - -on: [push] - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - name: Install Rust - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - target: wasm32-unknown-unknown - - name: Build - uses: actions-rs/cargo@v1 - with: - command: build diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml new file mode 100644 index 0000000..f146c65 --- /dev/null +++ b/.github/workflows/testing.yml @@ -0,0 +1,86 @@ +name: Test and verify Rust contributions + + +on: +# Run if contributions to dev or main are proposed. + pull_request: + branches: + - main + - dev + types: + - opened + paths: + - '**.rs' + +# Run if Rust code has been changed. Readme and other changes are not relevant. + push: + paths: + - '**.rs' + +env: + CARGO_TERM_COLOR: always + +jobs: + clippy: + + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + components: clippy + target: wasm32-unknown-unknown + + - name: Run the unit and integration tests + uses: actions-rs/cargo@v1 + with: + command: test + + - name: Install rust-clippy + run: cargo install clippy-sarif sarif-fmt + + - name: Run rust-clippy + run: + cargo clippy + --all-features + --message-format=json | clippy-sarif | tee rust-clippy-results.sarif | sarif-fmt + continue-on-error: true + + - name: Create an artifact from clippy results + uses: actions/upload-artifact@v3 + with: + name: ClippyResults + path: rust-clippy-results.sarif + + - name: Upload clippy analysis results to GitHub + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: rust-clippy-results.sarif + wait-for-processing: true + + tarpaulin: + name: Determine test coverage with Tarpaulin + runs-on: ubuntu-latest + container: + image: xd009642/tarpaulin:develop-nightly + options: --security-opt seccomp=unconfined + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Generate code coverage + run: | + cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml + + - name: Upload to codecov.io + uses: codecov/codecov-action@v2 + with: + fail_ci_if_error: true + + diff --git a/README.md b/README.md index 4c19d54..109b6fa 100644 --- a/README.md +++ b/README.md @@ -167,6 +167,6 @@ rm read1.fastq read2.fastq read3.fastq output1.fastq output2.fastq `umi-transfer` is a free and open-source software developed and maintained by scientists of the [Swedish National Genomics Infrastructure](https://ngisweden.scilifelab.se). We gladly welcome suggestions for improvement, bug reports and code contributions. -If you'd like to contribute code, the best way to get started is to create a personal fork of the repository. Subsequently, use a new branch to develop your feature or contribute your bug fix. Ideally, use a code linter like `rust-analyzer` in your code editor. +If you'd like to contribute code, the best way to get started is to create a personal fork of the repository. Subsequently, use a new branch to develop your feature or contribute your bug fix. Ideally, use a code linter like `rust-analyzer` in your code editor and run the tests with `cargo test`. Before developing a new feature, we recommend opening an issue on the main repository to discuss your proposal upfront. Once you're ready, simply open a pull request to the `dev` branch and we'll happily review your changes. Thanks for your interest in contributing to `umi-transfer`! From a2e478556c041a7f9083bd7ce56939d8807782e1 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Jul 2023 22:55:21 +0200 Subject: [PATCH 58/72] Devise a new release action that includes cross-plattform builds. Heavily inspired by/copied from Alex Hallam's tidy-viewer release action. --- .github/workflows/release.yml | 203 ++++++++++++++++++++++++++-------- 1 file changed, 154 insertions(+), 49 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 064e2ce..030101c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,60 +1,165 @@ -name: Publish to Dockerhub +name: Build umi-transfer on: - push: + pull_request: branches: - main - - 'releases/**' - - AddCIpipelines - tags: - - 'v*' #if a push with a version tag like v0.0.2 is recorded + + workflow_dispatch: release: types: [published] jobs: - push_to_registry: + # Thanks to Alex Hallam, from whose tidy-viewer release Action all compilation steps were copied (released under UNLICENSE terms) + # https://github.com/alexhallam/tv + # https://raw.githubusercontent.com/alexhallam/tv/main/.github/workflows/release.yml + build_binaries: if: github.repository == 'SciLifeLab/umi-transfer' - name: Push Docker image to Docker Hub / GitHub Docker Registry - runs-on: ubuntu-latest + name: Build binaries of the software + runs-on: ${{ matrix.os }} + env: + # For some builds, we use cross to test on 32-bit and big-endian + # systems. + CARGO: cargo + # When CARGO is set to CROSS, this is set to `--target matrix.target`. + TARGET_FLAGS: "" + # When CARGO is set to CROSS, TARGET_DIR includes matrix.target. + TARGET_DIR: ./target + # Emit backtraces on panics. + RUST_BACKTRACE: 1 + # Build static releases with PCRE2. + PCRE2_SYS_STATIC: 1 + + strategy: + fail-fast: false + matrix: + build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc] + include: + - build: linux + os: ubuntu-18.04 + rust: nightly + target: x86_64-unknown-linux-musl + - build: linux-arm + os: ubuntu-18.04 + rust: nightly + target: arm-unknown-linux-gnueabihf + - build: macos + os: macOS-11 + rust: nightly + target: x86_64-apple-darwin + - build: win-msvc + os: windows-2019 + rust: nightly + target: x86_64-pc-windows-msvc + - build: win-gnu + os: ubuntu-18.04 + rust: nightly-x86_64-gnu + target: x86_64-pc-windows-gnu + - build: win32-msvc + os: windows-2019 + rust: nightly + target: i686-pc-windows-msvc + steps: - - name: Check out the repo - uses: actions/checkout@v2 - - - name: Change repo name to lowercase - run: | - echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >>${GITHUB_ENV} - - - name: Log in to Docker Hub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKERHUB_USERNAME }} - password: ${{ secrets.DOCKERHUB_TOKEN }} - - - name: Login to GitHub Container Registry - uses: docker/login-action@v1 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Push dev image - uses: docker/build-push-action@v2 - if: github.event_name == 'push' - with: - push: true - tags: | - ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:dev - ghcr.io/${{ env.REPO_LOWERCASE }}:dev - - - name: Push release image - uses: docker/build-push-action@v2 - if: github.event_name == 'release' - with: - push: true - tags: | - ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:${{ github.event.release.tag_name }} - ${{ secrets.DOCKERHUB_USERNAME }}/umi-transfer:latest - ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.event.release.tag_name }} - ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.sha }} - ghcr.io/${{ env.REPO_LOWERCASE }}:latest + - name: Check out the repo + uses: actions/checkout@v2 + + - name: Set software version (release) + if: github.event_name == 'release' + run: | + echo "SOFTWARE_VERSION=${{ github.event.release.tag_name }}" >> ${GITHUB_ENV} + + - name: Set software version (other) + if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) + run: | + echo "SOFTWARE_VERSION=${{ github.event.number }}" >> ${GITHUB_ENV} + + - name: Install packages (Ubuntu) + if: matrix.os == 'ubuntu-18.04' + run: | + ci/ubuntu-install-packages + + - name: Install packages (macOS) + if: matrix.os == 'macOS-11' + run: | + ci/macos-install-packages + + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: ${{ matrix.rust }} + profile: minimal + override: true + target: ${{ matrix.target }} + + - name: Use Cross + shell: bash + run: | + cargo install cross + echo "CARGO=cross" >> $GITHUB_ENV + echo "TARGET_FLAGS=--target ${{ matrix.target }}" >> $GITHUB_ENV + echo "TARGET_DIR=./target/${{ matrix.target }}" >> $GITHUB_ENV + + - name: Show command used for Cargo + run: | + echo "cargo command is: ${{ env.CARGO }}" + echo "target flag is: ${{ env.TARGET_FLAGS }}" + echo "target dir is: ${{ env.TARGET_DIR }}" + + - name: Build release binary + run: ${{ env.CARGO }} build --verbose --release ${{ env.TARGET_FLAGS }} + + - name: Strip release binary (linux and macos) + if: matrix.build == 'linux' || matrix.build == 'macos' + run: strip "target/${{ matrix.target }}/release/umi-transfer" + + - name: Strip release binary (arm) + if: matrix.build == 'linux-arm' + run: | + docker run --rm -v \ + "$PWD/target:/target:Z" \ + rustembedded/cross:arm-unknown-linux-gnueabihf \ + arm-linux-gnueabihf-strip \ + /target/arm-unknown-linux-gnueabihf/release/umi-transfer + + - name: Build archive + shell: bash + run: | + #outdir="(ci/cargo-out-dir ${{ env.TARGET_DIR }})" + staging="umi-transfer-${{ env.SOFTWARE_VERSION }}-${{ matrix.target }}" + mkdir -p "$staging"/{complete,doc} + + cp {README.md,LICENSE} "$staging/" + cp -R docs "$staging/docs" + + if [ "${{ matrix.target }}" = "i686-pc-windows-msvc" ] || [ "${{ matrix.target }}" = "x86_64-pc-windows-gnu" ] || [ "${{ matrix.target }}" = "x86_64-pc-windows-msvc" ]; then + echo "Target is Windows Based" + cp "target/${{ matrix.target }}/release/umi-transfer.exe" "$staging/" + 7z a "$staging.zip" "$staging" + echo "ASSET=$staging.zip" >> $GITHUB_ENV + else + echo "Target is not Windows Based" + cp "target/${{ matrix.target }}/release/umi-transfer" "$staging/" + tar czf "$staging.tar.gz" "$staging" + echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV + fi + + + - name: Create artifact from binary + if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' + uses: actions/upload-artifact@v3 + with: + name: ${{ env.ASSET }} + path: ${{ env.ASSET }} + + - name: Upload release archive + uses: actions/upload-release-asset@v1.0.1 + if: github.event_name == 'release' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ${{ env.ASSET }} + asset_name: ${{ env.ASSET }} + asset_content_type: application/octet-stream From 734c2c79df8ae3e1c3fc3ebcf0ff8f0590f633e3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Jul 2023 22:55:59 +0200 Subject: [PATCH 59/72] Refurbish the Docker image build action. --- .github/workflows/container.yml | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 .github/workflows/container.yml diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml new file mode 100644 index 0000000..dcb5163 --- /dev/null +++ b/.github/workflows/container.yml @@ -0,0 +1,61 @@ +name: Publish container images + +on: + push: + branches: + - dev + paths: + - '**.rs' + + release: + types: [published] + + workflow_dispatch: + +jobs: + push_to_registry: + if: github.repository == 'SciLifeLab/umi-transfer' + name: Push Docker image to Docker Hub / GitHub Docker Registry + runs-on: ubuntu-latest + steps: + - name: Check out the repo + uses: actions/checkout@v2 + + - name: Change repo name to lowercase and set environment variables + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + + - name: Log in to Docker Hub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Push dev image + uses: docker/build-push-action@v3 + if: github.event_name == 'push' + with: + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/${{ env.REPOTITLE_LOWERCASE }}:dev + ghcr.io/${{ env.REPO_LOWERCASE }}:dev + + - name: Push release image + uses: docker/build-push-action@v3 + if: github.event_name == 'release' + with: + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/${{ env.REPOTITLE_LOWERCASE }}:${{ github.event.release.tag_name }} + ${{ secrets.DOCKERHUB_USERNAME }}/${{ env.REPOTITLE_LOWERCASE }}:latest + ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.event.release.tag_name }} + ghcr.io/${{ env.REPO_LOWERCASE }}:${{ github.sha }} + ghcr.io/${{ env.REPO_LOWERCASE }}:latest From 16099a291fa9b4d05db785a04695110ed5b8eea3 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Fri, 14 Jul 2023 23:38:59 +0200 Subject: [PATCH 60/72] Change EventTriggers to test workflows in my fork. --- .github/workflows/container.yml | 16 ++++++++++------ .github/workflows/release.yml | 6 +++++- .github/workflows/testing.yml | 5 +++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index dcb5163..fe92fb4 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -1,20 +1,24 @@ name: Publish container images on: - push: - branches: - - dev - paths: - - '**.rs' + #push: + # branches: + # - dev + # paths: + # - '**.rs' release: types: [published] workflow_dispatch: + push: + branches: + - BuildAction + jobs: push_to_registry: - if: github.repository == 'SciLifeLab/umi-transfer' + # if: github.repository == 'SciLifeLab/umi-transfer' name: Push Docker image to Docker Hub / GitHub Docker Registry runs-on: ubuntu-latest steps: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 030101c..d20bc75 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,12 +10,16 @@ on: release: types: [published] + push: + branches: + - BuildAction + jobs: # Thanks to Alex Hallam, from whose tidy-viewer release Action all compilation steps were copied (released under UNLICENSE terms) # https://github.com/alexhallam/tv # https://raw.githubusercontent.com/alexhallam/tv/main/.github/workflows/release.yml build_binaries: - if: github.repository == 'SciLifeLab/umi-transfer' + ##if: github.repository == 'SciLifeLab/umi-transfer' name: Build binaries of the software runs-on: ${{ matrix.os }} env: diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index f146c65..a270154 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -14,8 +14,8 @@ on: # Run if Rust code has been changed. Readme and other changes are not relevant. push: - paths: - - '**.rs' + # paths: + # - '**.rs' env: CARGO_TERM_COLOR: always @@ -66,6 +66,7 @@ jobs: tarpaulin: name: Determine test coverage with Tarpaulin + if: github.repository == 'SciLifeLab/umi-transfer' runs-on: ubuntu-latest container: image: xd009642/tarpaulin:develop-nightly From 9b7ccdc8ce0282433a82577e7f526e0dd313cc5f Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Sat, 15 Jul 2023 08:59:08 +0200 Subject: [PATCH 61/72] Bugfixes in the GithubAction workflows. Fingers crossed... --- .github/workflows/container.yml | 6 +++--- .github/workflows/release.yml | 20 +++++++++++--------- .github/workflows/testing.yml | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index fe92fb4..aa24cff 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Change repo name to lowercase and set environment variables run: | @@ -31,13 +31,13 @@ jobs: echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} - name: Log in to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Login to GitHub Container Registry - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d20bc75..cead476 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -41,11 +41,11 @@ jobs: build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc] include: - build: linux - os: ubuntu-18.04 + os: ubuntu-latest rust: nightly target: x86_64-unknown-linux-musl - build: linux-arm - os: ubuntu-18.04 + os: ubuntu-latest rust: nightly target: arm-unknown-linux-gnueabihf - build: macos @@ -57,7 +57,7 @@ jobs: rust: nightly target: x86_64-pc-windows-msvc - build: win-gnu - os: ubuntu-18.04 + os: ubuntu-latest rust: nightly-x86_64-gnu target: x86_64-pc-windows-gnu - build: win32-msvc @@ -67,7 +67,7 @@ jobs: steps: - name: Check out the repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set software version (release) if: github.event_name == 'release' @@ -80,14 +80,17 @@ jobs: echo "SOFTWARE_VERSION=${{ github.event.number }}" >> ${GITHUB_ENV} - name: Install packages (Ubuntu) - if: matrix.os == 'ubuntu-18.04' + if: matrix.os == 'ubuntu-latest' run: | - ci/ubuntu-install-packages + sudo apt-get update + sudo apt-get install -y --no-install-recommends \ + asciidoctor \ + zsh xz-utils liblz4-tool musl-tools - name: Install packages (macOS) if: matrix.os == 'macOS-11' run: | - ci/macos-install-packages + brew install asciidoctor - name: Install Rust uses: actions-rs/toolchain@v1 @@ -130,7 +133,6 @@ jobs: - name: Build archive shell: bash run: | - #outdir="(ci/cargo-out-dir ${{ env.TARGET_DIR }})" staging="umi-transfer-${{ env.SOFTWARE_VERSION }}-${{ matrix.target }}" mkdir -p "$staging"/{complete,doc} @@ -151,7 +153,7 @@ jobs: - name: Create artifact from binary - if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' + #if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' uses: actions/upload-artifact@v3 with: name: ${{ env.ASSET }} diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index a270154..2b80b9e 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -27,7 +27,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -66,20 +66,20 @@ jobs: tarpaulin: name: Determine test coverage with Tarpaulin - if: github.repository == 'SciLifeLab/umi-transfer' runs-on: ubuntu-latest container: image: xd009642/tarpaulin:develop-nightly options: --security-opt seccomp=unconfined steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Generate code coverage run: | cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml - name: Upload to codecov.io + if: github.repository == 'SciLifeLab/umi-transfer' uses: codecov/codecov-action@v2 with: fail_ci_if_error: true From c71aeb25cc3301674a91662913966c09c5eaab63 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 17 Jul 2023 13:13:37 +0200 Subject: [PATCH 62/72] Slight tweaks to the GithubActions and the Dockerfile. --- .github/workflows/container.yml | 2 +- .github/workflows/release.yml | 12 ++++++------ Dockerfile | 4 +++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index aa24cff..784305f 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -18,7 +18,7 @@ on: jobs: push_to_registry: - # if: github.repository == 'SciLifeLab/umi-transfer' + if: ( github.repository == 'SciLifeLab/umi-transfer' || github.repository == 'MatthiasZepper/umi-transfer') name: Push Docker image to Docker Hub / GitHub Docker Registry runs-on: ubuntu-latest steps: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index cead476..ff4e213 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -74,11 +74,6 @@ jobs: run: | echo "SOFTWARE_VERSION=${{ github.event.release.tag_name }}" >> ${GITHUB_ENV} - - name: Set software version (other) - if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) - run: | - echo "SOFTWARE_VERSION=${{ github.event.number }}" >> ${GITHUB_ENV} - - name: Install packages (Ubuntu) if: matrix.os == 'ubuntu-latest' run: | @@ -130,11 +125,16 @@ jobs: arm-linux-gnueabihf-strip \ /target/arm-unknown-linux-gnueabihf/release/umi-transfer + - name: Set software version (other) + if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) + run: | + echo "SOFTWARE_VERSION=${{ env.CARGO_PKG_VERSION }}" >> ${GITHUB_ENV} + - name: Build archive shell: bash run: | staging="umi-transfer-${{ env.SOFTWARE_VERSION }}-${{ matrix.target }}" - mkdir -p "$staging"/{complete,doc} + mkdir "$staging" cp {README.md,LICENSE} "$staging/" cp -R docs "$staging/docs" diff --git a/Dockerfile b/Dockerfile index 4d1e92f..1cadd47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,4 +15,6 @@ FROM debian:bullseye-slim as runner WORKDIR /root COPY --from=buildenv /usr/app/src/target/release/ /usr/local/bin/ RUN chmod +x /usr/local/bin/umi-transfer -CMD /usr/local/bin/umi-transfer + +ENV WHISPER_MODEL=$WHISPER_MODEL +CMD [ "/bin/bash", "-l","-c"] From dc5639535c73bcca404c781d028bd29dc038d17a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 17 Jul 2023 14:34:35 +0200 Subject: [PATCH 63/72] Switch to ructions from actions-rs, since the latter seem to be unmaintained. --- .github/workflows/release.yml | 27 ++++++--- .github/workflows/testing.yml | 4 +- Cargo.lock | 103 ++++++++++++++++------------------ 3 files changed, 69 insertions(+), 65 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ff4e213..3cd8041 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,6 +40,10 @@ jobs: matrix: build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc] include: + - build: linux + os: ubuntu-latest + rust: nightly + target: x86_64-unknown-linux-gnu - build: linux os: ubuntu-latest rust: nightly @@ -73,6 +77,11 @@ jobs: if: github.event_name == 'release' run: | echo "SOFTWARE_VERSION=${{ github.event.release.tag_name }}" >> ${GITHUB_ENV} + + - name: Set software version (PR) + if: matrix.os == 'ubuntu-latest' && (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' || github.event_name == 'push' ) + run: | + echo "SOFTWARE_VERSION=$(grep -Po -m 1 '(?<=version\s=\s\")[^\"]+' Cargo.toml)" >> ${GITHUB_ENV} - name: Install packages (Ubuntu) if: matrix.os == 'ubuntu-latest' @@ -88,7 +97,7 @@ jobs: brew install asciidoctor - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: ructions/toolchain@v2 with: toolchain: ${{ matrix.rust }} profile: minimal @@ -110,12 +119,21 @@ jobs: echo "target dir is: ${{ env.TARGET_DIR }}" - name: Build release binary - run: ${{ env.CARGO }} build --verbose --release ${{ env.TARGET_FLAGS }} + run: ${{ env.CARGO }} build --verbose --release ${{ env.TARGET_FLAGS }} - name: Strip release binary (linux and macos) if: matrix.build == 'linux' || matrix.build == 'macos' run: strip "target/${{ matrix.target }}/release/umi-transfer" + # For now revert to the old image, since the new image does not regognise the format of the compiled binary. + #- name: Strip release binary (arm) + # if: matrix.build == 'linux-arm' + # run: | + # docker run --rm -v \ + # "$PWD/target:/target:Z" \ + # ghcr.io/cross-rs/arm-unknown-linux-gnueabihf:edge \ + # strip /target/arm-unknown-linux-gnueabihf/release/umi-transfer + - name: Strip release binary (arm) if: matrix.build == 'linux-arm' run: | @@ -125,11 +143,6 @@ jobs: arm-linux-gnueabihf-strip \ /target/arm-unknown-linux-gnueabihf/release/umi-transfer - - name: Set software version (other) - if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) - run: | - echo "SOFTWARE_VERSION=${{ env.CARGO_PKG_VERSION }}" >> ${GITHUB_ENV} - - name: Build archive shell: bash run: | diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 2b80b9e..10d8732 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -30,7 +30,7 @@ jobs: uses: actions/checkout@v3 - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: ructions/toolchain@v2 with: toolchain: stable override: true @@ -38,7 +38,7 @@ jobs: target: wasm32-unknown-unknown - name: Run the unit and integration tests - uses: actions-rs/cargo@v1 + uses: ructions/cargo@v1 with: command: test diff --git a/Cargo.lock b/Cargo.lock index 3eb62c6..e624e93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,15 +8,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" -[[package]] -name = "aho-corasick" -version = "0.7.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" -dependencies = [ - "memchr", -] - [[package]] name = "aho-corasick" version = "1.0.2" @@ -77,9 +68,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" [[package]] name = "approx" @@ -92,9 +83,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.11" +version = "2.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" +checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" dependencies = [ "anstyle", "bstr", @@ -263,9 +254,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.3.11" +version = "4.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" +checksum = "8f644d0dac522c8b05ddc39aaaccc5b136d5dc4ff216610c5641e3be5becf56c" dependencies = [ "clap_builder", "clap_derive", @@ -274,9 +265,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.11" +version = "4.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" +checksum = "af410122b9778e024f9e0fb35682cc09cc3f85cad5e8d3ba8f47a9702df6e73d" dependencies = [ "anstream", "anstyle", @@ -286,14 +277,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.3.2" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] @@ -531,11 +522,11 @@ dependencies = [ [[package]] name = "globset" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" +checksum = "1391ab1f92ffcc08911957149833e682aa3fe252b9f45f966d2ef972274c97df" dependencies = [ - "aho-corasick 0.7.20", + "aho-corasick", "bstr", "fnv", "log", @@ -656,7 +647,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix 0.38.3", + "rustix 0.38.4", "windows-sys 0.48.0", ] @@ -686,9 +677,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" [[package]] name = "lazy_static" @@ -889,9 +880,9 @@ dependencies = [ [[package]] name = "paste" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" [[package]] name = "petgraph" @@ -905,9 +896,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.3.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "767eb9f07d4a5ebcb39bbf2d452058a93c011373abf6832e24194a1c3f004794" +checksum = "edc55135a600d700580e406b4de0d59cb9ad25e344a3a091a97ded2622ec4ec6" [[package]] name = "ppv-lite86" @@ -972,18 +963,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.63" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.29" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" +checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ "proc-macro2", ] @@ -1049,7 +1040,7 @@ version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575" dependencies = [ - "aho-corasick 1.0.2", + "aho-corasick", "memchr", "regex-automata", "regex-syntax", @@ -1057,20 +1048,20 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aaecc05d5c4b5f7da074b9a0d1a0867e71fd36e7fc0482d8bcfe8e8fc56290" +checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" dependencies = [ - "aho-corasick 1.0.2", + "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rustc_version" @@ -1097,9 +1088,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.3" +version = "0.38.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5" dependencies = [ "bitflags 2.3.3", "errno", @@ -1110,15 +1101,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "ryu" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" [[package]] name = "same-file" @@ -1137,22 +1128,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.167" +version = "1.0.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7daf513456463b42aa1d94cff7e0c24d682b429f020b9afa4f5ba5c40a22b237" +checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.167" +version = "1.0.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69b106b68bc8054f0e974e70d19984040f8a5cf9215ca82626ea4853f82c4b9" +checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] @@ -1247,9 +1238,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.23" +version = "2.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" +checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" dependencies = [ "proc-macro2", "quote", @@ -1293,7 +1284,7 @@ checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.23", + "syn 2.0.26", ] [[package]] @@ -1340,9 +1331,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" [[package]] name = "unicode-segmentation" From 282db79008512ccac24ab77118fa4e17b5fe6d70 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 18 Jul 2023 15:09:07 +0200 Subject: [PATCH 64/72] Extract version from Cargo.toml with grep to have it in the filenames. --- .github/workflows/container.yml | 14 +++++--------- .github/workflows/release.yml | 6 +----- .github/workflows/testing.yml | 4 ++-- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index 784305f..db392d4 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -1,21 +1,17 @@ name: Publish container images on: - #push: - # branches: - # - dev - # paths: - # - '**.rs' + push: + branches: + - dev + paths: + - '**.rs' release: types: [published] workflow_dispatch: - push: - branches: - - BuildAction - jobs: push_to_registry: if: ( github.repository == 'SciLifeLab/umi-transfer' || github.repository == 'MatthiasZepper/umi-transfer') diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3cd8041..0882f55 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,10 +10,6 @@ on: release: types: [published] - push: - branches: - - BuildAction - jobs: # Thanks to Alex Hallam, from whose tidy-viewer release Action all compilation steps were copied (released under UNLICENSE terms) # https://github.com/alexhallam/tv @@ -166,7 +162,7 @@ jobs: - name: Create artifact from binary - #if: (github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' + if: ((github.event_name == 'push' || github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' uses: actions/upload-artifact@v3 with: name: ${{ env.ASSET }} diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 10d8732..571a7dd 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -14,8 +14,8 @@ on: # Run if Rust code has been changed. Readme and other changes are not relevant. push: - # paths: - # - '**.rs' + paths: + - '**.rs' env: CARGO_TERM_COLOR: always From e855e1c9d6ee035cfb499e981450ed826fd8b66a Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 18 Jul 2023 15:18:29 +0200 Subject: [PATCH 65/72] Modify tarpaulin command to incluce integration tests --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 571a7dd..db014dc 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -76,7 +76,7 @@ jobs: - name: Generate code coverage run: | - cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out Xml + cargo +nightly tarpaulin --benches --follow-exec --timeout 120 --out Xml - name: Upload to codecov.io if: github.repository == 'SciLifeLab/umi-transfer' From 5a77497106dba2967f76c0bd6bf72b7e29f1babd Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 18 Jul 2023 21:04:32 +0200 Subject: [PATCH 66/72] Readme updates to inclue binaries and Docker. --- .github/workflows/release.yml | 2 +- .github/workflows/testing.yml | 12 +++++----- README.md | 42 +++++++++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0882f55..5aa6912 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -162,7 +162,7 @@ jobs: - name: Create artifact from binary - if: ((github.event_name == 'push' || github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' + if: (github.event_name == 'push' || github.event_name == 'pull-request' || github.event_name == 'workflow_dispatch' ) && github.event.ref == 'refs/heads/main' uses: actions/upload-artifact@v3 with: name: ${{ env.ASSET }} diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index db014dc..7237742 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -76,12 +76,12 @@ jobs: - name: Generate code coverage run: | - cargo +nightly tarpaulin --benches --follow-exec --timeout 120 --out Xml + cargo +nightly tarpaulin --workspace --benches --follow-exec --timeout 120 --out Xml - - name: Upload to codecov.io - if: github.repository == 'SciLifeLab/umi-transfer' - uses: codecov/codecov-action@v2 - with: - fail_ci_if_error: true + # - name: Upload to codecov.io + # if: github.repository == 'SciLifeLab/umi-transfer' + # uses: codecov/codecov-action@v2 + # with: + # fail_ci_if_error: true diff --git a/README.md b/README.md index 109b6fa..91e8c96 100644 --- a/README.md +++ b/README.md @@ -19,19 +19,51 @@ ## Background -To increase the accuracy of quantitative DNA sequencing experiments, Unique Molecular Identifiers may be used. UMIs are short sequences used to uniquely tag each molecule in a sample library and facilitate the accurate identification of read duplicates. They must be added during library preparation and prior to sequencing, therefore require appropriate arrangements with your sequencing provider. +To increase the accuracy of quantitative DNA sequencing experiments, Unique Molecular Identifiers may be used. UMIs are short sequences used to uniquely tag each molecule in a sample library, enabling precise identification of read duplicates. They must be added during library preparation and prior to sequencing, therefore require appropriate arrangements with your sequencing provider. Most tools capable of taking UMIs into consideration during an analysis workflow, expect the respective UMI sequence to be embedded into the read's ID. Please consult your tools' manuals regarding the exact specification. -For some some library preparation kits and sequencing adapters, the UMI sequence needs to be read together with the index from the antisense strand and thus will be output as a separate FastQ file during demultiplexing. +For some some library preparation kits and sequencing adapters, the UMI sequence needs to be read together with the index from the antisense strand. Consequently, it will be output as a separate FastQ file during the demultiplexing process. -This tools can integrate those separate UMIs into the headers in an efficient manner and can also correct divergent read numbers back to the canonical `1` and `2`. +This tool efficiently integrates these separate UMIs into the headers and can also correct divergent read numbers back to the canonical `1` and `2`. ## Installation +### Binary Installation + +Binaries for `umi-transfer` are available for most platforms and can be obtained from the [Releases page on GitHub](https://github.com/SciLifeLab/umi-transfer/releases). Simply navigate to the Releases page and download the appropriate binary of a release for your operating system. Once downloaded, you can add the binary to your system's `$PATH` or place it in a directory of your choice. + +### Containerized execution (Docker) + +Docker provides a platform for packaging software into self-contained units called containers. Containers encapsulate all the dependencies and libraries needed to run an application, making it easy to deploy and run the software consistently across different environments. + +To use `umi-transfer` with Docker, you can _pull_ the pre-made Docker image from Docker Hub. Open a terminal or command prompt and run the following command: + +```shell +docker pull mzscilifelab/umi-transfer:latest +``` + +Once the image is downloaded, you can run `umi-transfer` within a Docker container using: + +```shell +docker run -t -v `pwd`:`pwd` -w `pwd` mzscilifelab/umi-transfer:latest umi-transfer --help +``` + +A complete command might look like the example below. The options `-t -v -w` to Docker will ensure that your local directory is mapped to and available inside the container. Everything after the image command resembles the standard command line syntax: + +```shell +docker run -t -v `pwd`:`pwd` -w `pwd` mzscilifelab/umi-transfer:latest umi-transfer external --in=read1.fq --in2=read2.fq --umi=umi.fq +``` + +Optionally, you can create an alias for the Docker part of the command to be able to use the containerized version as if it was locally installed. Add the line below to your `~/.profile`, `~/.bash_aliases`, `~/.bashrc` or `~/.zprofile` (depending on the terminal & configuration being used). + +```shell +alias umi-transfer="docker run -t -v `pwd`:`pwd` -w `pwd` mzscilifelab/umi-transfer:latest umi-transfer" +``` + ### Compile from source -Given that you have [rust installed](https://www.rust-lang.org/tools/install) on your computer, download this repo and run +Given that you have [rust installed](https://www.rust-lang.org/tools/install) on your computer, download this repository and run ```shell cargo build --release @@ -44,6 +76,8 @@ That should create an executable `target/release/umi-transfer` that can be place umi-transfer 0.2.0 ``` + + ## Usage >### Performance Note From 7b1f5e08c3ca07ae102715416217794b45798534 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 31 Jul 2023 21:14:14 +0200 Subject: [PATCH 67/72] Implement exceptions to the prompts for /dev/null and FIFOs. --- src/file_io.rs | 54 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index 98c809e..c07088e 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -133,23 +133,49 @@ pub fn write_to_file( // Checks whether an output path exists. pub fn check_outputpath(path: PathBuf, force: &bool) -> Result { - // check if the path already exists - let exists = fs::metadata(&path).is_ok(); - - // return the path of it is ok to write, otherwise an error. - if exists & !force { - // force will disable prompt, but not the check. - if Confirm::with_theme(&ColorfulTheme::default()) - .with_prompt(format!("{} exists. Overwrite?", path.display())) - .interact()? + // Check if the path is "/dev/null" -> can/will be used for singletons. + if &path.to_string_lossy() == "/dev/null" { + return Ok(path); + } + + /* + fs::metadata() returns an Err() if the file does not exist (or there was an error accessing it). + map_or() is used to convert the Err to an OK variant of path, because it is safe to write to that path. + + If fs::metadata(path) returns Ok(metadata), it will be inspected further: If it is a FIFO or the --force CLI flag + is active, also allow writing. Otherwise prompt and ask for confirmation. + */ + fs::metadata(&path).map_or(Ok(path.clone()), |metadata| { + // Since FIFOs are not supported on non-unix platforms, compilation would fail otherwise. + #[cfg(unix)] { - println!("File will be overwritten."); - Ok(path) - } else { - Err(anyhow!(RuntimeErrors::FileExistsError(Some(path)))) + use std::os::unix::fs::FileTypeExt; + // On unix platforms, we want to disable prompts for FIFOs for convenience reasons. + if metadata.file_type().is_fifo() || *force { + Ok(path) + } else { + prompt_overwrite(path) + } } - } else { + #[cfg(not(unix))] + { + if *force { + Ok(path) // Return Ok(path) + } else { + prompt_overwrite(path) + } + } + }) +} + +fn prompt_overwrite(path: PathBuf) -> Result { + if Confirm::with_theme(&ColorfulTheme::default()) + .with_prompt(format!("{} exists. Overwrite?", path.display())) + .interact()? + { Ok(path) + } else { + Err(anyhow!(RuntimeErrors::FileExistsError(Some(path)))) } } From eff67b2b494b83cd5efe87de33ed586993dee271 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Mon, 31 Jul 2023 21:33:46 +0200 Subject: [PATCH 68/72] Fix the 'all variants have the same postfix' warning. I appreciated the consistency, though. --- src/file_io.rs | 11 +++++------ src/umi_errors.rs | 22 +++++++++++----------- src/umi_external.rs | 4 ++-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/file_io.rs b/src/file_io.rs index c07088e..4bdba83 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -59,8 +59,7 @@ impl OutputFile { // Read input file to Reader. Automatically scans if input is compressed with file-format crate. pub fn read_fastq(path: &PathBuf) -> Result>> { - fs::metadata(path) - .map_err(|_e| anyhow!(RuntimeErrors::FileNotFoundError(Some(path.into()))))?; + fs::metadata(path).map_err(|_e| anyhow!(RuntimeErrors::FileNotFound(Some(path.into()))))?; let format = FileFormat::from_file(path).context("Failed to determine file format")?; let reader: ReadFile = match format { @@ -89,14 +88,14 @@ pub fn output_file(name: PathBuf) -> Result { read: std::fs::File::create(name.as_path()) .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, }) } else { // File has extension but not gz Ok(OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, }) } } else { @@ -104,7 +103,7 @@ pub fn output_file(name: PathBuf) -> Result { Ok(OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteableError(Some(name))))?, + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, }) } } @@ -175,7 +174,7 @@ fn prompt_overwrite(path: PathBuf) -> Result { { Ok(path) } else { - Err(anyhow!(RuntimeErrors::FileExistsError(Some(path)))) + Err(anyhow!(RuntimeErrors::FileExists(Some(path)))) } } diff --git a/src/umi_errors.rs b/src/umi_errors.rs index 85f4a40..3889f6f 100644 --- a/src/umi_errors.rs +++ b/src/umi_errors.rs @@ -2,39 +2,39 @@ use std::path::PathBuf; #[derive(Debug)] pub enum RuntimeErrors { - FileExistsError(Option), - FileNotFoundError(Option), - OutputNotWriteableError(Option), - ReadIDMismatchError, + FileExists(Option), + FileNotFound(Option), + OutputNotWriteable(Option), + ReadIDMismatch, ReadWriteError(bio::io::fastq::Record), } impl std::fmt::Display for RuntimeErrors { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::FileExistsError(None) => { + Self::FileExists(None) => { write!(f, "Output file exists, but must not be overwritten.") } - Self::FileExistsError(Some(path)) => write!( + Self::FileExists(Some(path)) => write!( f, "Output file {} exists, but must not be overwritten.", path.display() ), - Self::FileNotFoundError(None) => { + Self::FileNotFound(None) => { write!(f, "Specified file does not exist or is not readable!") } - Self::FileNotFoundError(Some(path)) => { + Self::FileNotFound(Some(path)) => { write!(f, "{} does not exist or is not readable!", path.display()) } - Self::OutputNotWriteableError(None) => { + Self::OutputNotWriteable(None) => { write!(f, "Output file is missing or not writeable.") } - Self::OutputNotWriteableError(Some(path)) => write!( + Self::OutputNotWriteable(Some(path)) => write!( f, "Output file {} is missing or not writeable.", path.display() ), - Self::ReadIDMismatchError => write!( + Self::ReadIDMismatch => write!( f, "IDs of UMI and read records mismatch. Please provide sorted files as input!" ), diff --git a/src/umi_external.rs b/src/umi_external.rs index a68d418..5c18b93 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -151,7 +151,7 @@ pub fn run(args: OptsExternal) -> Result { read_nr, )?; } else { - return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); + return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } if r2_rec.id().eq(ru_rec.id()) { @@ -165,7 +165,7 @@ pub fn run(args: OptsExternal) -> Result { read_nr, )?; } else { - return Err(anyhow!(RuntimeErrors::ReadIDMismatchError)); + return Err(anyhow!(RuntimeErrors::ReadIDMismatch)); } } println!("Processed {:?} records", counter); From c95b357f473f4f7e7a187b18a48a089721dbd8e8 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 1 Aug 2023 12:17:07 +0200 Subject: [PATCH 69/72] Include instructions for singletons in README.md and change version to 1.0.0 --- README.md | 22 +++++++++++++--------- src/main.rs | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 91e8c96..6fc46ab 100644 --- a/README.md +++ b/README.md @@ -73,11 +73,9 @@ That should create an executable `target/release/umi-transfer` that can be place ```shell ./target/release/umi-transfer --version -umi-transfer 0.2.0 +umi-transfer 1.0.0 ``` - - ## Usage >### Performance Note @@ -123,15 +121,23 @@ OPTIONS: ### Example +A run with just the mandatory arguments may look like this: + ```shell umi-transfer external -fz -d '_' --in 'R1.fastq' --in2 'R3.fastq' --umi 'R2.fastq' ``` +`umi-transfer` warrants paired input files. To run on singletons, use the same input twice and redirect one output to `/dev/null`: + +```shell +umi-transfer external --in read1.fastq --in2 read1.fastq --umi read2.fastq --out output1.fastq --out2 /dev/null +``` + ### High Performance Guide The performance bottleneck of UMI integration is output file compression. [Parallel Gzip](https://github.com/madler/pigz) can be used on modern multi-processor, multi-core machines to significantly outclass the single-threaded compression that ships with `umi-transfer`. -We recommend using Unix FIFOs (First In, First Out buffered pipes) to combine `umi-transfer` and `pigz`: +We recommend using Unix FIFOs (First In, First Out buffered pipes) to combine `umi-transfer` and `pigz` on GNU/Linux and MacOS operating systems: ```shell mkfifo read1.fastq @@ -168,8 +174,8 @@ prw-rw-r--. 1 alneberg ngisweden 0 Apr 13 12:46 read3.fastq We continue to create FIFOs for the output files: ```shell -$ mkfifo output1.fastq -$ mkfifo output2.fastq +mkfifo output1.fastq +mkfifo output2.fastq ``` and set-up a multi-threaded `pigz` compression process each: @@ -186,11 +192,9 @@ The argument `-p 10` specifies the number of threads that each `pigz` processes Finally, we can then run `umi-transfer` using the FIFOs like so: ```shell -umi-transfer external --force --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq +umi-transfer external --in read1.fastq --in2 read3.fastq --umi read2.fastq --out output1.fastq --out2 output2.fastq ``` -`--force` is optional and skips the prompt whether existing output files may be overwritten, which will be triggered by the prepared FIFOs. - It's good practice to remove the FIFOs after the program has finished: ```shell diff --git a/src/main.rs b/src/main.rs index f716f1e..df355a1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,7 +25,7 @@ https://github.com/SciLifeLab/umi-transfer #[derive(clap::Parser)] #[clap( - version = "0.2.0", + version = "1.0.0", author = "Written by Judit Hohenthal, Matthias Zepper & Johannes Alneberg", about = "A tool for transferring Unique Molecular Identifiers (UMIs).", long_about = "Most tools capable of using UMIs to increase the accuracy of quantitative DNA sequencing experiments expect the respective UMI sequence to be embedded into the reads' IDs. You can use `umi-transfer external` to retrieve UMIs from a separate FastQ file and embed them to the IDs of your paired FastQ files." From 7d99e7949a6453e4161f51e7f269d871c59e9b8b Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 1 Aug 2023 12:49:07 +0200 Subject: [PATCH 70/72] Dropping support for implicit compressed output by specifing .gz extension. This will result in a more consistent behaviour. --- Cargo.lock | 117 ++++++++++++++++---------------------------- Cargo.toml | 2 +- src/file_io.rs | 43 ++++++++-------- src/umi_external.rs | 4 +- 4 files changed, 66 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e624e93..8bf1fbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -254,9 +254,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.3.15" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f644d0dac522c8b05ddc39aaaccc5b136d5dc4ff216610c5641e3be5becf56c" +checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d" dependencies = [ "clap_builder", "clap_derive", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.3.15" +version = "4.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af410122b9778e024f9e0fb35682cc09cc3f85cad5e8d3ba8f47a9702df6e73d" +checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1" dependencies = [ "anstream", "anstyle", @@ -284,7 +284,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.28", ] [[package]] @@ -385,9 +385,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] name = "encode_unicode" @@ -417,9 +417,9 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" dependencies = [ "errno-dragonfly", "libc", @@ -438,12 +438,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.9.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" [[package]] name = "feature-probe" @@ -522,9 +519,9 @@ dependencies = [ [[package]] name = "globset" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1391ab1f92ffcc08911957149833e682aa3fe252b9f45f966d2ef972274c97df" +checksum = "aca8bbd8e0707c1887a8bbb7e6b40e228f251ff5d62c8220a4a7a53c73aff006" dependencies = [ "aho-corasick", "bstr", @@ -629,17 +626,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi 0.3.2", - "libc", - "windows-sys 0.48.0", -] - [[package]] name = "is-terminal" version = "0.4.9" @@ -647,7 +633,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix 0.38.4", + "rustix", "windows-sys 0.48.0", ] @@ -701,15 +687,9 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "linux-raw-sys" -version = "0.3.8" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" - -[[package]] -name = "linux-raw-sys" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" [[package]] name = "log" @@ -840,9 +820,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", "libm", @@ -896,9 +876,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc55135a600d700580e406b4de0d59cb9ad25e344a3a091a97ded2622ec4ec6" +checksum = "f32154ba0af3a075eefa1eda8bb414ee928f62303a54ea85b8d6638ff1a6ee9e" [[package]] name = "ppv-lite86" @@ -972,9 +952,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" dependencies = [ "proc-macro2", ] @@ -1048,9 +1028,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310" +checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" dependencies = [ "aho-corasick", "memchr", @@ -1072,20 +1052,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustix" -version = "0.37.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.3.8", - "windows-sys 0.48.0", -] - [[package]] name = "rustix" version = "0.38.4" @@ -1095,7 +1061,7 @@ dependencies = [ "bitflags 2.3.3", "errno", "libc", - "linux-raw-sys 0.4.3", + "linux-raw-sys", "windows-sys 0.48.0", ] @@ -1128,22 +1094,22 @@ checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" [[package]] name = "serde" -version = "1.0.171" +version = "1.0.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.171" +version = "1.0.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" +checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.28", ] [[package]] @@ -1238,9 +1204,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" dependencies = [ "proc-macro2", "quote", @@ -1249,15 +1215,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ - "autocfg", "cfg-if", "fastrand", "redox_syscall", - "rustix 0.37.23", + "rustix", "windows-sys 0.48.0", ] @@ -1269,22 +1234,22 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.28", ] [[package]] @@ -1311,7 +1276,7 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "umi-transfer" -version = "0.2.0" +version = "1.0.0" dependencies = [ "anyhow", "assert_cmd", diff --git a/Cargo.toml b/Cargo.toml index 9805db3..bfeb640 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "umi-transfer" -version = "0.2.0" +version = "1.0.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/file_io.rs b/src/file_io.rs index 4bdba83..3533ff1 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -80,26 +80,15 @@ pub fn read_fastq(path: &PathBuf) -> Result Result { - if let Some(extension) = name.extension() { - if extension == "gz" { - // File has gz extension, which has been enforced by check_outputpath() if -z was provided. - Ok(OutputFile::Gzip { - read: std::fs::File::create(name.as_path()) - .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) - .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, - }) - } else { - // File has extension but not gz - Ok(OutputFile::Fastq { - read: std::fs::File::create(name.as_path()) - .map(bio::io::fastq::Writer::new) - .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, - }) - } +pub fn output_file(name: PathBuf, compress: &bool) -> Result { + if *compress { + Ok(OutputFile::Gzip { + read: std::fs::File::create(name.as_path()) + .map(|w| flate2::write::GzEncoder::new(w, flate2::Compression::default())) + .map(bio::io::fastq::Writer::new) + .map_err(|_e| anyhow!(RuntimeErrors::OutputNotWriteable(Some(name))))?, + }) } else { - //file has no extension. Assume plain-text. Ok(OutputFile::Fastq { read: std::fs::File::create(name.as_path()) .map(bio::io::fastq::Writer::new) @@ -132,14 +121,14 @@ pub fn write_to_file( // Checks whether an output path exists. pub fn check_outputpath(path: PathBuf, force: &bool) -> Result { - // Check if the path is "/dev/null" -> can/will be used for singletons. + // Skip overwrite prompt for "/dev/null" -> can/will be used for singletons. if &path.to_string_lossy() == "/dev/null" { return Ok(path); } /* fs::metadata() returns an Err() if the file does not exist (or there was an error accessing it). - map_or() is used to convert the Err to an OK variant of path, because it is safe to write to that path. + map_or() is used to convert the Err to an OK variant of path, because it is safe to write to that new path. If fs::metadata(path) returns Ok(metadata), it will be inspected further: If it is a FIFO or the --force CLI flag is active, also allow writing. Otherwise prompt and ask for confirmation. @@ -180,6 +169,18 @@ fn prompt_overwrite(path: PathBuf) -> Result { // Checks whether an output path exists. pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result { + // Optional code, since compilation would fail on platforms that don't support FIFOs (Windows etc.) + #[cfg(unix)] + { + // output path exists: Do not change output for FIFOs on unix platforms. + if let Some(metadata) = fs::metadata(&path).ok() { + use std::os::unix::fs::FileTypeExt; + if metadata.file_type().is_fifo() { + return Ok(path); + } + } + } + // handle the compression and adapt file extension if necessary. if let Some(extension) = path.extension().and_then(|e| e.to_str()) { match (*compress, extension.ends_with("gz")) { diff --git a/src/umi_external.rs b/src/umi_external.rs index 5c18b93..cbeee4c 100644 --- a/src/umi_external.rs +++ b/src/umi_external.rs @@ -123,8 +123,8 @@ pub fn run(args: OptsExternal) -> Result { println!("Output 1 will be saved to: {}", output1.to_string_lossy()); println!("Output 2 will be saved to: {}", output2.to_string_lossy()); - let mut write_file_r1 = file_io::output_file(output1)?; - let mut write_file_r2 = file_io::output_file(output2)?; + let mut write_file_r1 = file_io::output_file(output1, &args.gzip)?; + let mut write_file_r2 = file_io::output_file(output2, &args.gzip)?; // Record counter let mut counter: i32 = 0; From da8cff3ae3a6ab5105316ec9bdf646d9074a9466 Mon Sep 17 00:00:00 2001 From: Matthias Zepper Date: Tue, 1 Aug 2023 16:58:38 +0200 Subject: [PATCH 71/72] Match on Ok() instead of Some() in rectify_extension() to avoid going from Result to Option and back. --- src/file_io.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/file_io.rs b/src/file_io.rs index 3533ff1..8eda587 100644 --- a/src/file_io.rs +++ b/src/file_io.rs @@ -173,7 +173,7 @@ pub fn rectify_extension(mut path: PathBuf, compress: &bool) -> Result #[cfg(unix)] { // output path exists: Do not change output for FIFOs on unix platforms. - if let Some(metadata) = fs::metadata(&path).ok() { + if let Ok(metadata) = fs::metadata(&path) { use std::os::unix::fs::FileTypeExt; if metadata.file_type().is_fifo() { return Ok(path); From 3213396901980b71f92169acd0f5438769b1590f Mon Sep 17 00:00:00 2001 From: Johannes Alneberg Date: Mon, 14 Aug 2023 15:21:11 +0200 Subject: [PATCH 72/72] Typofix in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6fc46ab..e291759 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ To increase the accuracy of quantitative DNA sequencing experiments, Unique Mole Most tools capable of taking UMIs into consideration during an analysis workflow, expect the respective UMI sequence to be embedded into the read's ID. Please consult your tools' manuals regarding the exact specification. -For some some library preparation kits and sequencing adapters, the UMI sequence needs to be read together with the index from the antisense strand. Consequently, it will be output as a separate FastQ file during the demultiplexing process. +For some library preparation kits and sequencing adapters, the UMI sequence needs to be read together with the index from the antisense strand. Consequently, it will be output as a separate FastQ file during the demultiplexing process. This tool efficiently integrates these separate UMIs into the headers and can also correct divergent read numbers back to the canonical `1` and `2`.