From f3fd1108b63b2bfcdcb90fb7917ac099a46c7851 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Fri, 5 Apr 2024 10:04:55 -0700 Subject: [PATCH 01/52] Start extracting cellranger-related args. --- enclone_args/src/load_gex_core.rs | 17 ++++-- enclone_args/src/load_gex_util.rs | 83 +++++++++----------------- enclone_args/src/proc_args3.rs | 12 ++-- enclone_args/src/read_json.rs | 2 +- enclone_core/src/defs.rs | 8 ++- enclone_ranger/src/main_enclone.rs | 3 +- enclone_stuff/src/populate_features.rs | 2 +- enclone_stuff/src/start.rs | 2 +- 8 files changed, 56 insertions(+), 73 deletions(-) diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index ca6a80fdf..0a9608d52 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -141,15 +141,22 @@ pub fn load_gex( // Find files. - let pca_file = find_pca_file(ctl, &outs, &analysis, pathlist); - let json_metrics_file = find_json_metrics_file(ctl, &outs, &analysis, pathlist); - let feature_metrics_file = find_feature_metrics_file(ctl, &outs, &analysis, pathlist); - let metrics_file = find_metrics_file(ctl, &outs, &analysis, pathlist); - let cluster_file = find_cluster_file(ctl, &outs, &analysis, pathlist); + let pca_file = find_pca_file(&analysis, pathlist); + let cluster_file = find_cluster_file(&analysis, pathlist); + + let (json_metrics_file, feature_metrics_file, metrics_file) = if !ctl.gen_opt.cellranger.cellranger {( + find_json_metrics_file(&analysis, pathlist), + find_feature_metrics_file(&analysis, pathlist), + find_metrics_file(&outs, pathlist) + )} else { + Default::default() + }; // Proceed. for f in &[pca_file.clone(), cluster_file.clone()] { + + if !path_exists(f) { r.error = format!( "\nThe file\n{f}\ndoes not exist. \ diff --git a/enclone_args/src/load_gex_util.rs b/enclone_args/src/load_gex_util.rs index bec9577c5..d94fdbd38 100644 --- a/enclone_args/src/load_gex_util.rs +++ b/enclone_args/src/load_gex_util.rs @@ -1,15 +1,9 @@ // Copyright (c) 2022 10X Genomics, Inc. All rights reserved. -use enclone_core::defs::EncloneControl; use io_utils::{dir_list, path_exists}; use vector_utils::VecUtils; -pub fn find_pca_file( - _ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_pca_file(analysis: &[String], pathlist: &mut Vec) -> String { let mut pca_file = String::new(); for x in analysis { pca_file = format!("{x}/pca/10_components/projection.csv"); @@ -26,73 +20,50 @@ pub fn find_pca_file( pca_file } -pub fn find_json_metrics_file( - ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_json_metrics_file(analysis: &[String], pathlist: &mut Vec) -> String { let mut json_metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - for x in analysis { - let f = format!("{x}/metrics_summary_json.json"); - if path_exists(&f) { - json_metrics_file = f.clone(); - pathlist.push(f); - break; - } + for x in analysis { + let f = format!("{x}/metrics_summary_json.json"); + if path_exists(&f) { + json_metrics_file = f.clone(); + pathlist.push(f); + break; } } + json_metrics_file } -pub fn find_feature_metrics_file( - ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_feature_metrics_file(analysis: &[String], pathlist: &mut Vec) -> String { let mut feature_metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - for x in analysis { - let f = format!("{x}/per_feature_metrics.csv"); - if path_exists(&f) { - feature_metrics_file = f.clone(); - pathlist.push(f); - break; - } + for x in analysis { + let f = format!("{x}/per_feature_metrics.csv"); + if path_exists(&f) { + feature_metrics_file = f.clone(); + pathlist.push(f); + break; } } + feature_metrics_file } -pub fn find_metrics_file( - ctl: &EncloneControl, - outs: &str, - _analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_metrics_file(outs: &str, pathlist: &mut Vec) -> String { let mut metrics_file = String::new(); - if !ctl.gen_opt.cellranger { - let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); - if path_exists(&summary_dir) { - let list = dir_list(&summary_dir); - if list.solo() { - let path = format!("{summary_dir}/{}", list[0]); - pathlist.push(path.clone()); - metrics_file = path; - } + let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); + if path_exists(&summary_dir) { + let list = dir_list(&summary_dir); + if list.solo() { + let path = format!("{summary_dir}/{}", list[0]); + pathlist.push(path.clone()); + metrics_file = path; } } + metrics_file } -pub fn find_cluster_file( - _ctl: &EncloneControl, - _outs: &str, - analysis: &[String], - pathlist: &mut Vec, -) -> String { +pub fn find_cluster_file(analysis: &[String], pathlist: &mut Vec) -> String { let mut cluster_file = String::new(); for x in analysis { cluster_file = format!("{x}/clustering/graphclust/clusters.csv"); diff --git a/enclone_args/src/proc_args3.rs b/enclone_args/src/proc_args3.rs index 12aeb1fb6..aaba3301a 100644 --- a/enclone_args/src/proc_args3.rs +++ b/enclone_args/src/proc_args3.rs @@ -314,14 +314,14 @@ pub fn proc_xcr( } let val = expand_integer_ranges(val); let val = expand_analysis_sets(&val)?; - let donor_groups = if ctl.gen_opt.cellranger { + let donor_groups = if ctl.gen_opt.cellranger.cellranger { vec![&val[..]] } else { val.split(';').collect::>() }; let mut gex2 = expand_integer_ranges(gex); gex2 = expand_analysis_sets(&gex2)?; - let donor_groups_gex = if ctl.gen_opt.cellranger { + let donor_groups_gex = if ctl.gen_opt.cellranger.cellranger { vec![&gex2[..]] } else { gex2.split(';').collect::>() @@ -353,14 +353,14 @@ pub fn proc_xcr( } for (id, d) in donor_groups.iter().enumerate() { - let origin_groups = if ctl.gen_opt.cellranger { + let origin_groups = if ctl.gen_opt.cellranger.cellranger { vec![&d[..]] } else { (*d).split(':').collect::>() }; let mut origin_groups_gex = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger { + if ctl.gen_opt.cellranger.cellranger { origin_groups_gex = vec![donor_groups_gex[id]]; } else { origin_groups_gex = donor_groups_gex[id].split(':').collect::>(); @@ -389,7 +389,7 @@ pub fn proc_xcr( } } for (is, s) in origin_groups.iter().enumerate() { - let mut datasets = if ctl.gen_opt.cellranger { + let mut datasets = if ctl.gen_opt.cellranger.cellranger { vec![&s[..]] } else { (*s).split(',').collect::>() @@ -402,7 +402,7 @@ pub fn proc_xcr( let datasets_gex: Vec<&str>; let mut datasets_bc = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger { + if ctl.gen_opt.cellranger.cellranger { datasets_gex = vec![origin_groups_gex[is]]; } else { datasets_gex = origin_groups_gex[is].split(',').collect::>(); diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs index 3fb875359..01b7dc1f1 100644 --- a/enclone_args/src/read_json.rs +++ b/enclone_args/src/read_json.rs @@ -697,7 +697,7 @@ pub fn parse_json_annotations_files( refdata: &RefData, ) -> Result { // Note: only tracking truncated seq and quals initially - let ann = if !ctl.gen_opt.cellranger { + let ann = if !ctl.gen_opt.cellranger.cellranger { "all_contig_annotations.json" } else { "contig_annotations.json" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f3833704c..a7fb42bde 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -91,10 +91,17 @@ impl OriginInfo { } } +/// The subset of configuration options used by Cellranger. +#[derive(Default, PartialEq)] +pub struct CellrangerOpt { + pub cellranger: bool, +} + // Miscellaneous general options. #[derive(Default, PartialEq)] pub struct GeneralOpt { + pub cellranger: CellrangerOpt, pub pre: Vec, pub indels: bool, pub reannotate: bool, @@ -146,7 +153,6 @@ pub struct GeneralOpt { pub required_three_chain_clonotypes: Option, pub required_four_chain_clonotypes: Option, pub required_datasets: Option, - pub cellranger: bool, pub summary: bool, pub summary_clean: bool, pub summary_csv: bool, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 0a708905f..47750559a 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -82,11 +82,10 @@ pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { pub fn main_enclone_setup_ranger(args: &[String]) -> Result { let tall = Instant::now(); - // Set up stuff, read args, etc. let mut ctl = EncloneControl::default(); - ctl.gen_opt.cellranger = true; + ctl.gen_opt.cellranger.cellranger = true; for arg in args.iter().skip(1) { if arg.starts_with("PRE=") { ctl.gen_opt.pre.clear(); diff --git a/enclone_stuff/src/populate_features.rs b/enclone_stuff/src/populate_features.rs index 4a2176fda..559954723 100644 --- a/enclone_stuff/src/populate_features.rs +++ b/enclone_stuff/src/populate_features.rs @@ -139,7 +139,7 @@ pub fn populate_features( // the user supplied the wrong reference, so there is no value in criticizing the reference // in that case. - if !log.is_empty() && !ctl.gen_opt.cellranger && !ctl.gen_opt.accept_broken { + if !log.is_empty() && !ctl.gen_opt.cellranger.cellranger && !ctl.gen_opt.accept_broken { let mut log = Vec::::new(); fwriteln!( log, diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index ee715a803..c5d6b4570 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -475,7 +475,7 @@ pub fn main_enclone_start( for (clone, d) in ex.clones.iter().take(ex.ncells()).zip(to_delete.iter_mut()) { let li = clone[0].dataset_index; let bc = &clone[0].barcode; - if ctl.gen_opt.cellranger { + if ctl.gen_opt.cellranger.cellranger { if gex_cells_specified[li] && !bin_member(&gex_cells[li], bc) { *d = true; fate[li].insert(bc.clone(), BarcodeFate::NotGexCell); From 7a216f2205378eed0a45e29808d8243302e42c72 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 19:08:13 -0700 Subject: [PATCH 02/52] Start extracting the CR subset of args into a type. --- Cargo.lock | 1 + enclone_args/src/load_gex_core.rs | 2 +- enclone_args/src/proc_args.rs | 2 - enclone_args/src/proc_args3.rs | 12 +-- enclone_args/src/proc_args_post.rs | 2 +- enclone_args/src/read_json.rs | 2 +- enclone_core/src/defs.rs | 109 +++++++++++++++++++------ enclone_ranger/Cargo.toml | 1 + enclone_ranger/src/main_enclone.rs | 23 ++++-- enclone_stuff/src/populate_features.rs | 2 +- enclone_stuff/src/start.rs | 10 +-- 11 files changed, 117 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cc73fa959..5eee85578 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -514,6 +514,7 @@ dependencies = [ name = "enclone_ranger" version = "0.5.219" dependencies = [ + "anyhow", "enclone", "enclone_args", "enclone_core", diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index 0a9608d52..995bce271 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -144,7 +144,7 @@ pub fn load_gex( let pca_file = find_pca_file(&analysis, pathlist); let cluster_file = find_cluster_file(&analysis, pathlist); - let (json_metrics_file, feature_metrics_file, metrics_file) = if !ctl.gen_opt.cellranger.cellranger {( + let (json_metrics_file, feature_metrics_file, metrics_file) = if !ctl.cr_opt.cellranger {( find_json_metrics_file(&analysis, pathlist), find_feature_metrics_file(&analysis, pathlist), find_metrics_file(&outs, pathlist) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index fadd910c8..4f9b6d780 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -601,7 +601,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let set_string_writeable = [ ("BINARY", &mut ctl.gen_opt.binary), - ("DONOR_REF_FILE", &mut ctl.gen_opt.dref_file), ("FATE_FILE", &mut ctl.gen_opt.fate_file), ("HONEY_OUT", &mut ctl.plot_opt.honey_out), ("PROTO", &mut ctl.gen_opt.proto), @@ -641,7 +640,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String // Define arguments that do nothing (because already parsed), and which have no "= value" part. let set_nothing_simple = [ - "CELLRANGER", "DUMP_INTERNAL_IDS", "EVIL_EYE", "LONG_HELP", diff --git a/enclone_args/src/proc_args3.rs b/enclone_args/src/proc_args3.rs index aaba3301a..530d24d3c 100644 --- a/enclone_args/src/proc_args3.rs +++ b/enclone_args/src/proc_args3.rs @@ -314,14 +314,14 @@ pub fn proc_xcr( } let val = expand_integer_ranges(val); let val = expand_analysis_sets(&val)?; - let donor_groups = if ctl.gen_opt.cellranger.cellranger { + let donor_groups = if ctl.cr_opt.cellranger { vec![&val[..]] } else { val.split(';').collect::>() }; let mut gex2 = expand_integer_ranges(gex); gex2 = expand_analysis_sets(&gex2)?; - let donor_groups_gex = if ctl.gen_opt.cellranger.cellranger { + let donor_groups_gex = if ctl.cr_opt.cellranger { vec![&gex2[..]] } else { gex2.split(';').collect::>() @@ -353,14 +353,14 @@ pub fn proc_xcr( } for (id, d) in donor_groups.iter().enumerate() { - let origin_groups = if ctl.gen_opt.cellranger.cellranger { + let origin_groups = if ctl.cr_opt.cellranger { vec![&d[..]] } else { (*d).split(':').collect::>() }; let mut origin_groups_gex = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger.cellranger { + if ctl.cr_opt.cellranger { origin_groups_gex = vec![donor_groups_gex[id]]; } else { origin_groups_gex = donor_groups_gex[id].split(':').collect::>(); @@ -389,7 +389,7 @@ pub fn proc_xcr( } } for (is, s) in origin_groups.iter().enumerate() { - let mut datasets = if ctl.gen_opt.cellranger.cellranger { + let mut datasets = if ctl.cr_opt.cellranger { vec![&s[..]] } else { (*s).split(',').collect::>() @@ -402,7 +402,7 @@ pub fn proc_xcr( let datasets_gex: Vec<&str>; let mut datasets_bc = Vec::<&str>::new(); if have_gex { - if ctl.gen_opt.cellranger.cellranger { + if ctl.cr_opt.cellranger { datasets_gex = vec![origin_groups_gex[is]]; } else { datasets_gex = origin_groups_gex[is].split(',').collect::>(); diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 714037f7d..2385858ef 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -246,7 +246,7 @@ pub fn proc_args_post( &mut ctl.plot_opt.plot_file, &mut ctl.gen_opt.fasta_filename, &mut ctl.gen_opt.fasta_aa_filename, - &mut ctl.gen_opt.dref_file, + &mut ctl.cr_opt.dref_file, &mut ctl.parseable_opt.pout, ]; for f in &mut files { diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs index 01b7dc1f1..7e43c25fe 100644 --- a/enclone_args/src/read_json.rs +++ b/enclone_args/src/read_json.rs @@ -697,7 +697,7 @@ pub fn parse_json_annotations_files( refdata: &RefData, ) -> Result { // Note: only tracking truncated seq and quals initially - let ann = if !ctl.gen_opt.cellranger.cellranger { + let ann = if !ctl.cr_opt.cellranger { "all_contig_annotations.json" } else { "contig_annotations.json" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index a7fb42bde..1b5f73a40 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -2,10 +2,12 @@ use crate::cell_color::CellColor; use crate::linear_condition::LinearCondition; +use anyhow::Result; use debruijn::dna_string::DnaString; use evalexpr::Node; use hdf5::Dataset; +use itertools::Itertools; use regex::Regex; use std::cmp::max; use std::collections::HashMap; @@ -94,14 +96,50 @@ impl OriginInfo { /// The subset of configuration options used by Cellranger. #[derive(Default, PartialEq)] pub struct CellrangerOpt { + /// True if enclone is being called from Cellranger. pub cellranger: bool, + /// Path to donor reference file. + pub dref_file: String, +} + +impl CellrangerOpt { + /// Process command line arguments relevant to cellranger. + /// Any unused arguments are returned for further processing. + pub fn from_args(args: Vec) -> Result<(Self, Vec)> { + let mut cr_opts = Self::default(); + let mut unused_args = Vec::new(); + for arg in args { + let mut pieces = arg.split('='); + let arg_name = pieces.next().unwrap(); + match arg_name { + "CELLRANGER" => { + cr_opts.cellranger = true; + } + "DONOR_REF_FILE" => { + cr_opts.dref_file = pieces.exactly_one().expect("FIXME").to_string(); + } + _ => { + // FIXME + unused_args.push(arg.clone()); + } + } + } + Ok((cr_opts, unused_args)) + } + + /// Validate parsed options. + pub fn validate(&self) -> Result<()> { + if !self.dref_file.is_empty() { + // TODO: test writability + } + Ok(()) + } } // Miscellaneous general options. #[derive(Default, PartialEq)] pub struct GeneralOpt { - pub cellranger: CellrangerOpt, pub pre: Vec, pub indels: bool, pub reannotate: bool, @@ -139,7 +177,6 @@ pub struct GeneralOpt { pub ext: String, pub extc: HashMap<(String, String), String>, pub extn: HashMap, - pub dref_file: String, pub mouse: bool, pub refname: String, pub noprint: bool, @@ -525,28 +562,52 @@ pub struct ParseableOpt { #[derive(Default)] pub struct EncloneControl { - pub start_time: Option, // enclone start time - pub gen_opt: GeneralOpt, // miscellaneous general options - pub plot_opt: PlotOpt, // plot options - pub pretty: bool, // use escape characters to enhance view - pub nogray: bool, // don't gray in per cell lines - pub silent: bool, // turn off extra logging - pub force: bool, // make joins even if redundant - pub debug_table_printing: bool, // turn on debugging for table printing - pub merge_all_impropers: bool, // merge all improper exact subclonotypes - pub heur: ClonotypeHeuristics, // algorithmic heuristics - pub origin_info: OriginInfo, // origin (sample) info - pub allele_alg_opt: AlleleAlgOpt, // algorithmic options for allele finding - pub allele_print_opt: AllelePrintOpt, // print options for allele finding - pub join_alg_opt: JoinAlgOpt, // algorithmic options for join - pub join_print_opt: JoinPrintOpt, // printing options for join operations - pub clono_filt_opt_def: ClonoFiltOptDefault, // default filtering options for clonotypes - pub clono_filt_opt: ClonoFiltOpt, // filtering options for clonotypes - pub clono_print_opt: ClonoPrintOpt, // printing options for clonotypes - pub clono_group_opt: ClonoGroupOpt, // grouping options for clonotypes - pub parseable_opt: ParseableOpt, // parseable output options - pub pathlist: Vec, // list of input files - pub last_modified: Vec, // last modified for pathlist + /// enclone start time + pub start_time: Option, + /// miscellaneous general options + pub gen_opt: GeneralOpt, + /// Config options used by cellranger. + pub cr_opt: CellrangerOpt, + /// plot options + pub plot_opt: PlotOpt, + /// use escape characters to enhance view + pub pretty: bool, + /// don't gray in per cell lines + pub nogray: bool, + /// turn off extra logging + pub silent: bool, + /// make joins even if redundant + pub force: bool, + /// turn on debugging for table printing + pub debug_table_printing: bool, + /// merge all improper exact subclonotypes + pub merge_all_impropers: bool, + /// algorithmic heuristics + pub heur: ClonotypeHeuristics, + /// origin (sample) info + pub origin_info: OriginInfo, + /// algorithmic options for allele finding + pub allele_alg_opt: AlleleAlgOpt, + /// print options for allele finding + pub allele_print_opt: AllelePrintOpt, + /// algorithmic options for join + pub join_alg_opt: JoinAlgOpt, + /// printing options for join operations + pub join_print_opt: JoinPrintOpt, + /// default filtering options for clonotypes + pub clono_filt_opt_def: ClonoFiltOptDefault, + /// filtering options for clonotypes + pub clono_filt_opt: ClonoFiltOpt, + /// printing options for clonotypes + pub clono_print_opt: ClonoPrintOpt, + /// grouping options for clonotypes + pub clono_group_opt: ClonoGroupOpt, + /// parseable output options + pub parseable_opt: ParseableOpt, + /// list of input files + pub pathlist: Vec, + /// last modified for pathlist + pub last_modified: Vec, } // Set up data structure to track clonotype data. A TigData is for one contig; diff --git a/enclone_ranger/Cargo.toml b/enclone_ranger/Cargo.toml index 569060864..c3be101bf 100644 --- a/enclone_ranger/Cargo.toml +++ b/enclone_ranger/Cargo.toml @@ -20,6 +20,7 @@ publish = false # in the root of the enclone repo. [dependencies] +anyhow.workspace = true enclone_core = { path = "../enclone_core" } enclone_process = { path = "../enclone_process" } enclone_stuff = { path = "../enclone_stuff" } diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 47750559a..d35760ef7 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -5,10 +5,11 @@ use self::refx::{make_vdj_ref_data_core, RefData}; use crate::USING_PAGER; +use anyhow::anyhow; use enclone::innate::species; use enclone_args::load_gex::get_gex_info; use enclone_args::proc_args::proc_args; -use enclone_core::defs::EncloneControl; +use enclone_core::defs::{CellrangerOpt, EncloneControl}; use enclone_core::enclone_structs::EncloneSetup; use enclone_process::process_clonotypes::{process_clonotypes, OrbitProcessor}; use enclone_stuff::start::main_enclone_start; @@ -21,7 +22,7 @@ use std::{ use string_utils::TextUtils; use vdj_ann::refx; -pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { +pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { const REQUIRED_ARGS: [&str; 8] = [ "CELLRANGER", "DONOR_REF_FILE", @@ -75,17 +76,23 @@ pub fn main_enclone_ranger(args: &[String]) -> Result<(), String> { } } let setup = main_enclone_setup_ranger(args)?; - let (exacts, fate) = main_enclone_start(&setup)?; + let (exacts, fate) = main_enclone_start(&setup).map_err(|e| anyhow!(e))?; let gex_readers = setup.create_gex_readers(); process_clonotypes::<(), ()>(&setup, &exacts, &gex_readers, &fate, NoOpProc) + .map_err(|e| anyhow!(e))?; + Ok(()) } -pub fn main_enclone_setup_ranger(args: &[String]) -> Result { +pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result { let tall = Instant::now(); // Set up stuff, read args, etc. + let (cr_opt, args) = CellrangerOpt::from_args(args)?; + + let mut ctl = EncloneControl { + cr_opt, + ..Default::default() + }; - let mut ctl = EncloneControl::default(); - ctl.gen_opt.cellranger.cellranger = true; for arg in args.iter().skip(1) { if arg.starts_with("PRE=") { ctl.gen_opt.pre.clear(); @@ -100,11 +107,11 @@ pub fn main_enclone_setup_ranger(args: &[String]) -> Result::new(); fwriteln!( log, diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index c5d6b4570..c8f8774a6 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -286,12 +286,12 @@ pub fn main_enclone_start( alt_refs = find_alleles(refdata, ctl, &exact_clonotypes); } - if !ctl.gen_opt.dref_file.is_empty() { - let f = File::create(&ctl.gen_opt.dref_file); + if !ctl.cr_opt.dref_file.is_empty() { + let f = File::create(&ctl.cr_opt.dref_file); if f.is_err() { eprintln!( - "\nError trying to write ctl.gen_opt.dref_file = {}.", - ctl.gen_opt.dref_file + "\nError trying to write ctl.cr_opt.dref_file = {}.", + ctl.cr_opt.dref_file ); } let mut f = BufWriter::new(f.unwrap()); @@ -475,7 +475,7 @@ pub fn main_enclone_start( for (clone, d) in ex.clones.iter().take(ex.ncells()).zip(to_delete.iter_mut()) { let li = clone[0].dataset_index; let bc = &clone[0].barcode; - if ctl.gen_opt.cellranger.cellranger { + if ctl.cr_opt.cellranger { if gex_cells_specified[li] && !bin_member(&gex_cells[li], bc) { *d = true; fate[li].insert(bc.clone(), BarcodeFate::NotGexCell); From e7b2789fcdf021160811305438140317e30b7dfa Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 19:26:42 -0700 Subject: [PATCH 03/52] Remove unused bug reports config. --- enclone_core/src/defs.rs | 3 ++- enclone_ranger/src/main_enclone.rs | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 1b5f73a40..53b99b474 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -111,12 +111,13 @@ impl CellrangerOpt { for arg in args { let mut pieces = arg.split('='); let arg_name = pieces.next().unwrap(); + let get_val = || pieces.exactly_one().expect("FIXME").to_string(); match arg_name { "CELLRANGER" => { cr_opts.cellranger = true; } "DONOR_REF_FILE" => { - cr_opts.dref_file = pieces.exactly_one().expect("FIXME").to_string(); + cr_opts.dref_file = get_val(); } _ => { // FIXME diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index d35760ef7..3223e24e6 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -24,8 +24,8 @@ use vdj_ann::refx; pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { const REQUIRED_ARGS: [&str; 8] = [ - "CELLRANGER", - "DONOR_REF_FILE", + "CELLRANGER", // done + "DONOR_REF_FILE", // done "MAX_CORES", "NOPAGER", "NOPRINT", From 59629adcfc802943c34426b8f100fccb017e3412 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 20:35:33 -0700 Subject: [PATCH 04/52] Move the nopager config option. --- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 6 +++++- enclone_ranger/src/main_enclone.rs | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 4f9b6d780..c2aeaa3c9 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -649,7 +649,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "NALL", "NALL_CELL", "NALL_GEX", - "NO_KILL", "NOPAGER", "NOPRETTY", "PLAIN", diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 53b99b474..91466de86 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -100,6 +100,8 @@ pub struct CellrangerOpt { pub cellranger: bool, /// Path to donor reference file. pub dref_file: String, + /// If true, do not use output paging. + pub nopager: bool, } impl CellrangerOpt { @@ -116,6 +118,9 @@ impl CellrangerOpt { "CELLRANGER" => { cr_opts.cellranger = true; } + "NOPAGER" => { + cr_opts.nopager = true; + } "DONOR_REF_FILE" => { cr_opts.dref_file = get_val(); } @@ -238,7 +243,6 @@ pub struct GeneralOpt { pub subset_json: String, pub fold_headers: bool, pub no_uncap_sim: bool, - pub nopager: bool, pub info: Option, pub info_fields: Vec, pub info_data: HashMap>, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 3223e24e6..fa8f86bc3 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -104,7 +104,7 @@ pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result Date: Sat, 6 Apr 2024 20:45:33 -0700 Subject: [PATCH 05/52] Remove dead NOPRETTY arg and fix up NOPAGER. --- enclone/src/UNDOC_OPTIONS | 1 - enclone/src/info.rs | 4 ---- enclone_args/src/proc_args.rs | 2 -- enclone_ranger/src/main_enclone.rs | 4 +--- 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 9f4489ebc..01fd44fb3 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -67,7 +67,6 @@ Other optional arguments: 2. You want to see the effect of changed annotation code. - NPLAIN: reverses PLAIN - INDELS: search for and list CDR3s from clonotypes with possible SHM indels (exploratory) -- NOPRETTY: turn off pretty trace entirely - HEAVY_CHAIN_REUSE: look for instances of heavy chain reuse - BINARY=filename: generate binary output file - PROTO=filename: generate proto output file diff --git a/enclone/src/info.rs b/enclone/src/info.rs index 87b80684d..f9cc03470 100644 --- a/enclone/src/info.rs +++ b/enclone/src/info.rs @@ -222,10 +222,6 @@ pub fn build_info( } else { // maybe can't happen vs.push(rt.clone()); - // At one point there was a bug in which the following line was missing. - // This caused a traceback on "enclone 123085 RE". It is interesting because - // the traceback did not get back to the main program, even with - // "enclone 123085 RE NOPRETTY". vs_notes.push(String::new()); vsnx = String::new(); } diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index c2aeaa3c9..3040929c0 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -649,8 +649,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "NALL", "NALL_CELL", "NALL_GEX", - "NOPAGER", - "NOPRETTY", "PLAIN", "PRINT_CPU", "PRINT_CPU_INFO", diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index fa8f86bc3..5a5c92a73 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -33,10 +33,9 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "PROTO", "REF", ]; - const ALLOWED_ARGS: [&str; 17] = [ + const ALLOWED_ARGS: [&str; 16] = [ "BCR", "META", - "NOPRETTY", "PROTO_METADATA", "TCR", "TCRGD", @@ -104,7 +103,6 @@ pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result Date: Sat, 6 Apr 2024 20:57:21 -0700 Subject: [PATCH 06/52] Remove pager function from enclone_ranger. --- Cargo.lock | 11 ----------- enclone/Cargo.toml | 3 --- enclone/src/misc1.rs | 40 ---------------------------------------- 3 files changed, 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5eee85578..33ef4cd76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,7 +405,6 @@ dependencies = [ "hdf5", "io_utils", "itertools", - "pager", "petgraph", "qd", "rayon", @@ -1202,16 +1201,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "pager" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2599211a5c97fbbb1061d3dc751fa15f404927e4846e07c643287d6d1f462880" -dependencies = [ - "errno", - "libc", -] - [[package]] name = "parking_lot" version = "0.12.1" diff --git a/enclone/Cargo.toml b/enclone/Cargo.toml index 6764478dd..62586ba2f 100644 --- a/enclone/Cargo.toml +++ b/enclone/Cargo.toml @@ -38,9 +38,6 @@ string_utils = { path = "../string_utils" } vdj_ann = { path = "../vdj_ann" } vector_utils = { path = "../vector_utils" } -[target.'cfg(not(windows))'.dependencies] -pager = "0.16" - [target.'cfg(not(windows))'.dependencies.hdf5] git = "https://github.com/10XGenomics/hdf5-rust.git" branch = "conda_nov2021" diff --git a/enclone/src/misc1.rs b/enclone/src/misc1.rs index bfe0c586a..c66ef1a9c 100644 --- a/enclone/src/misc1.rs +++ b/enclone/src/misc1.rs @@ -9,8 +9,6 @@ use enclone_core::{ }; use equiv::EquivRel; use itertools::Itertools; -#[cfg(not(target_os = "windows"))] -use pager::Pager; use std::time::Instant; use string_utils::stringme; @@ -20,44 +18,6 @@ use vector_utils::{ // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// This section contains a function that supports paging. It does not work under Windows, and -// we describe here all the *known* problems with getting enclone to work under Windows. -// 1. It does not compile for us. When we tried, there was a problem with libhdf-5. -// 2. Paging is turned off, because the pager crate doesn't compile under Windows, and porting -// it to Windows appears nontrivial. -// 3. ANSI escape characters are not handled correctly, at least by default. -// In addition, we have some concerns about what it would mean to properly test enclone on Windows, -// given that some users might have older OS installs, and support for ANSI escape characters -// appears to have been changed in 2018. This is not made easier by the Windows Subsystem for -// Linux. - -#[cfg(not(target_os = "windows"))] -pub fn setup_pager(pager: bool) { - // If the output is going to a terminal, set up paging so that output is in effect piped to - // "less -R -F -X -K". - // - // ∙ The option -R is used to render ANSI escape characters correctly. We do not use - // -r instead because if you navigate backwards in less -r, stuff gets screwed up, - // which is consistent with the scary stuff in the man page for less at -r. However -R will - // not display all unicode characters correctly, so those have to be picked carefully, - // by empirically testing that e.g. "echo ◼ | less -R -F -X" renders correctly. - // - // ∙ The -F option makes less exit immediately if all the output can be seen in one screen. - // - // ∙ The -X option is needed because we found that in full screen mode on OSX Catalina, output - // was sent to the alternate screen, and hence it appeared that one got no output at all - // from enclone. This is really bad, so do not turn off this option! - - if pager { - Pager::with_pager("less -R -F -X -K").setup(); - } -} - -#[cfg(target_os = "windows")] -pub fn setup_pager(_pager: bool) {} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - // Lookup for heavy chain reuse (special purpose experimental option). // This is interesting but not likely to yield interesting examples of heavy chain reuse // because biologically it doesn't make sense that one would have both H-L1 and H-L2 expanded. From a25e9acd995d9226b846470c0d59c2f7200bba16 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 21:05:39 -0700 Subject: [PATCH 07/52] Restore NOPAGER proc. --- enclone_args/src/proc_args.rs | 1 + enclone_core/src/defs.rs | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 3040929c0..fb432142a 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -649,6 +649,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "NALL", "NALL_CELL", "NALL_GEX", + "NOPAGER", "PLAIN", "PRINT_CPU", "PRINT_CPU_INFO", diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 91466de86..835e6b92b 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -100,8 +100,6 @@ pub struct CellrangerOpt { pub cellranger: bool, /// Path to donor reference file. pub dref_file: String, - /// If true, do not use output paging. - pub nopager: bool, } impl CellrangerOpt { @@ -118,9 +116,6 @@ impl CellrangerOpt { "CELLRANGER" => { cr_opts.cellranger = true; } - "NOPAGER" => { - cr_opts.nopager = true; - } "DONOR_REF_FILE" => { cr_opts.dref_file = get_val(); } From 9fe7093bc190baf66895ae2903fbdb038d7c87a6 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 21:08:12 -0700 Subject: [PATCH 08/52] Remove NOPAGER from required args. --- enclone_ranger/src/main_enclone.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 5a5c92a73..1f770f965 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -27,7 +27,6 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "CELLRANGER", // done "DONOR_REF_FILE", // done "MAX_CORES", - "NOPAGER", "NOPRINT", "PRE", "PROTO", From 1d7ccbcc9ca364fbd9ac927c30c15fcf8939a705 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 21:19:20 -0700 Subject: [PATCH 09/52] Extract the PROTO arg. --- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 12 ++++++++---- enclone_process/src/loupe.rs | 6 +++--- enclone_process/src/process_clonotypes.rs | 2 +- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index fb432142a..b5d607c16 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -603,7 +603,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("BINARY", &mut ctl.gen_opt.binary), ("FATE_FILE", &mut ctl.gen_opt.fate_file), ("HONEY_OUT", &mut ctl.plot_opt.honey_out), - ("PROTO", &mut ctl.gen_opt.proto), ("SUBSET_JSON", &mut ctl.gen_opt.subset_json), ]; diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 835e6b92b..19aa06afb 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -98,8 +98,10 @@ impl OriginInfo { pub struct CellrangerOpt { /// True if enclone is being called from Cellranger. pub cellranger: bool, - /// Path to donor reference file. + /// Path to donor reference output file. pub dref_file: String, + /// Path to protobuf output file. + pub proto: String, } impl CellrangerOpt { @@ -111,13 +113,16 @@ impl CellrangerOpt { for arg in args { let mut pieces = arg.split('='); let arg_name = pieces.next().unwrap(); - let get_val = || pieces.exactly_one().expect("FIXME").to_string(); + let mut get_rest = || pieces.join("="); match arg_name { "CELLRANGER" => { cr_opts.cellranger = true; } "DONOR_REF_FILE" => { - cr_opts.dref_file = get_val(); + cr_opts.dref_file = get_rest(); + } + "PROTO" => { + cr_opts.proto = get_rest(); } _ => { // FIXME @@ -168,7 +173,6 @@ pub struct GeneralOpt { pub complete: bool, pub exact: Option, pub binary: String, - pub proto: String, pub fate_file: String, // Optional path to a json file containing metadata pub proto_metadata: Option, diff --git a/enclone_process/src/loupe.rs b/enclone_process/src/loupe.rs index 896e3dc7a..e94b265b2 100644 --- a/enclone_process/src/loupe.rs +++ b/enclone_process/src/loupe.rs @@ -430,7 +430,7 @@ pub fn loupe_out( refdata: &RefData, dref: &[DonorReferenceItem], ) { - if !ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty() { + if !ctl.gen_opt.binary.is_empty() || !ctl.cr_opt.proto.is_empty() { let mut uref = Vec::new(); for i in 0..refdata.refs.len() { uref.push(UniversalReferenceItem { @@ -468,8 +468,8 @@ pub fn loupe_out( if !ctl.gen_opt.binary.is_empty() { write_obj(&enclone_outputs, &ctl.gen_opt.binary); } - if !ctl.gen_opt.proto.is_empty() { - write_proto(enclone_outputs, &ctl.gen_opt.proto).unwrap(); + if !ctl.cr_opt.proto.is_empty() { + write_proto(enclone_outputs, &ctl.cr_opt.proto).unwrap(); } } } diff --git a/enclone_process/src/process_clonotypes.rs b/enclone_process/src/process_clonotypes.rs index c1f6badb2..54938c29c 100644 --- a/enclone_process/src/process_clonotypes.rs +++ b/enclone_process/src/process_clonotypes.rs @@ -185,7 +185,7 @@ pub fn process_clonotypes( // Generate Loupe data. - let loupe_clonotype = (!ctl.gen_opt.binary.is_empty() || !ctl.gen_opt.proto.is_empty()) + let loupe_clonotype = (!ctl.gen_opt.binary.is_empty() || !ctl.cr_opt.proto.is_empty()) .then(|| make_loupe_clonotype(exact_clonotypes, &exacts, &rsi, refdata, dref, ctl)); // Let n be the total number of cells in this pass. From 53c577a7334c37b3db8030b71becc7143782be29 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Sat, 6 Apr 2024 21:23:32 -0700 Subject: [PATCH 10/52] Extract the PROTO_METADATA arg. --- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 7 +++++-- enclone_process/src/loupe.rs | 10 ++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index b5d607c16..25a0afa6f 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -623,7 +623,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String &mut ctl.gen_opt.clonotype_group_names, ), ("HONEY_IN", &mut ctl.plot_opt.honey_in), - ("PROTO_METADATA", &mut ctl.gen_opt.proto_metadata), ]; // Define arguments that set something to a string that is an input file name, not represented diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 19aa06afb..0e82286f8 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -102,6 +102,8 @@ pub struct CellrangerOpt { pub dref_file: String, /// Path to protobuf output file. pub proto: String, + /// Optional path to a json file containing metadata. + pub proto_metadata: String, } impl CellrangerOpt { @@ -124,6 +126,9 @@ impl CellrangerOpt { "PROTO" => { cr_opts.proto = get_rest(); } + "PROTO_METADATA" => { + cr_opts.proto_metadata = get_rest(); + } _ => { // FIXME unused_args.push(arg.clone()); @@ -174,8 +179,6 @@ pub struct GeneralOpt { pub exact: Option, pub binary: String, pub fate_file: String, - // Optional path to a json file containing metadata - pub proto_metadata: Option, pub h5_pre: bool, pub accept_reuse: bool, pub descrip: bool, diff --git a/enclone_process/src/loupe.rs b/enclone_process/src/loupe.rs index e94b265b2..fdbe1efef 100644 --- a/enclone_process/src/loupe.rs +++ b/enclone_process/src/loupe.rs @@ -447,13 +447,15 @@ pub fn loupe_out( nt_sequence: refdata.refs[i].to_ascii_vec(), }); } - let metadata = match &ctl.gen_opt.proto_metadata { - Some(fname) => serde_json::from_reader( + let metadata = if !ctl.cr_opt.proto_metadata.is_empty() { + let fname = &ctl.cr_opt.proto_metadata; + serde_json::from_reader( std::fs::File::open(fname) .unwrap_or_else(|_| panic!("Error while reading {fname}")), ) - .unwrap_or_else(|_| panic!("Unable to deserialize Metadata from {fname}")), - None => Metadata::default(), + .unwrap_or_else(|_| panic!("Unable to deserialize Metadata from {fname}")) + } else { + Metadata::default() }; let enclone_outputs = EncloneOutputs { version: PROTO_VERSION.into(), From 935beeec22691d444617751251accc463e0f5be9 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 18:55:48 -0400 Subject: [PATCH 11/52] Mark proto/etc as done. --- enclone_ranger/src/main_enclone.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 1f770f965..2d36325e1 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -29,13 +29,13 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "MAX_CORES", "NOPRINT", "PRE", - "PROTO", + "PROTO", // done "REF", ]; const ALLOWED_ARGS: [&str; 16] = [ "BCR", "META", - "PROTO_METADATA", + "PROTO_METADATA", // done "TCR", "TCRGD", "GAMMA_DELTA", From 0f7d07c1aaa7009f9f22f4b7705bd704bfe7dcfd Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 19:15:15 -0400 Subject: [PATCH 12/52] Move NUMI into cr_opts. --- enclone_args/src/proc_args.rs | 28 +++++++++++++--------------- enclone_args/src/proc_args_post.rs | 2 +- enclone_core/src/defs.rs | 23 +++++++++++++++++++++-- enclone_stuff/src/filter_umi.rs | 2 +- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 25a0afa6f..5d48eb484 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -130,7 +130,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt_def.doublet = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; - ctl.clono_filt_opt_def.umi_filt = true; ctl.clono_filt_opt_def.umi_ratio_filt = true; ctl.clono_filt_opt.max_exacts = 1_000_000_000; @@ -335,10 +334,11 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String } // Preprocess NALL and NALL_GEX. - - for i in 1..args.len() { - if args[i] == *"NALL" || args[i] == "NALL_CELL" || args[i] == "NALL_GEX" { - let f = [ + // FIXME: these should be implmeneted as a direct action on opt rather than + // pushing additional command line args. + for arg in &args[1..].to_vec() { + if arg == "NALL" || arg == "NALL_CELL" || arg == "NALL_GEX" { + for arg_to_append in [ "NCELL", "NGEX", "NCROSS", @@ -356,18 +356,17 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "MIX_DONORS", "NIMPROPER", "NSIG", - ]; - for j in 0..f.len() { - if f[j] == "NCELL" { - if args[i] != "NALL_CELL" { - args.push(f[j].to_string()); + ] { + if arg_to_append == "NCELL" { + if arg != "NALL_CELL" { + args.push(arg_to_append.to_string()); } - } else if f[j] == "NGEX" { - if args[i] != "NALL_GEX" { - args.push(f[j].to_string()); + } else if arg_to_append == "NGEX" { + if arg != "NALL_GEX" { + args.push(arg_to_append.to_string()); } } else { - args.push(f[j].to_string()); + args.push(arg_to_append.to_string()); } } break; @@ -516,7 +515,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), ("NSIG", &mut ctl.clono_filt_opt_def.signature), ("NSILENT", &mut ctl.silent), - ("NUMI", &mut ctl.clono_filt_opt_def.umi_filt), ("NUMI_RATIO", &mut ctl.clono_filt_opt_def.umi_ratio_filt), ("NWEAK_CHAINS", &mut ctl.clono_filt_opt_def.weak_chains), ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 2385858ef..2bebf7ca9 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -473,7 +473,7 @@ pub fn proc_args_post( "\nIf the value of PHYLIP_DNA is not stdout, it must end in .tar.\n".to_string(), ); } - if ctl.clono_filt_opt_def.umi_filt && ctl.clono_filt_opt_def.umi_filt_mark { + if ctl.cr_opt.umi_filt && ctl.clono_filt_opt_def.umi_filt_mark { return Err( "\nIf you use UMI_FILT_MARK, you should also use NUMI, to turn off \ the filter,\nas otherwise nothing will be marked.\n" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 0e82286f8..2524568d3 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -94,7 +94,7 @@ impl OriginInfo { } /// The subset of configuration options used by Cellranger. -#[derive(Default, PartialEq)] +#[derive(PartialEq)] pub struct CellrangerOpt { /// True if enclone is being called from Cellranger. pub cellranger: bool, @@ -104,6 +104,23 @@ pub struct CellrangerOpt { pub proto: String, /// Optional path to a json file containing metadata. pub proto_metadata: String, + + // Clonotype filtering options. + // TOOD: split these back out into a separate struct? + /// umi count filter + pub umi_filt: bool, +} + +impl Default for CellrangerOpt { + fn default() -> Self { + Self { + cellranger: Default::default(), + dref_file: Default::default(), + proto: Default::default(), + proto_metadata: Default::default(), + umi_filt: true, + } + } } impl CellrangerOpt { @@ -129,6 +146,9 @@ impl CellrangerOpt { "PROTO_METADATA" => { cr_opts.proto_metadata = get_rest(); } + "NUMI" => { + cr_opts.umi_filt = false; + } _ => { // FIXME unused_args.push(arg.clone()); @@ -433,7 +453,6 @@ pub struct ClonoFiltOptDefault { pub weak_onesies: bool, // filter weak onesies pub doublet: bool, // filter putative doublets pub fcell: Vec, // constraints from FCELL - pub umi_filt: bool, // umi count filter pub umi_filt_mark: bool, // umi count filter (but only mark) pub umi_ratio_filt: bool, // umi ratio filter pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) diff --git a/enclone_stuff/src/filter_umi.rs b/enclone_stuff/src/filter_umi.rs index ab2e5ac0e..2a2febc59 100644 --- a/enclone_stuff/src/filter_umi.rs +++ b/enclone_stuff/src/filter_umi.rs @@ -192,7 +192,7 @@ pub fn filter_umi( .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::Umi); } } - if ctl.clono_filt_opt_def.umi_filt { + if ctl.cr_opt.umi_filt { erase_if(&mut ex.clones, &to_delete); } } From 96a71e1c0662f063a131beda2f742219492d28b8 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 19:19:54 -0400 Subject: [PATCH 13/52] Move NUMI_RATIO into cr_opts. --- enclone_args/src/proc_args.rs | 2 -- enclone_args/src/proc_args_post.rs | 2 +- enclone_core/src/defs.rs | 4 +++- enclone_stuff/src/filter_umi.rs | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 5d48eb484..2f09091d8 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -130,7 +130,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt_def.doublet = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; - ctl.clono_filt_opt_def.umi_ratio_filt = true; ctl.clono_filt_opt.max_exacts = 1_000_000_000; ctl.clono_print_opt.amino = vec![ @@ -515,7 +514,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), ("NSIG", &mut ctl.clono_filt_opt_def.signature), ("NSILENT", &mut ctl.silent), - ("NUMI_RATIO", &mut ctl.clono_filt_opt_def.umi_ratio_filt), ("NWEAK_CHAINS", &mut ctl.clono_filt_opt_def.weak_chains), ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 2bebf7ca9..6a1bd70ad 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -480,7 +480,7 @@ pub fn proc_args_post( .to_string(), ); } - if ctl.clono_filt_opt_def.umi_ratio_filt && ctl.clono_filt_opt_def.umi_ratio_filt_mark { + if ctl.cr_opt.umi_ratio_filt && ctl.clono_filt_opt_def.umi_ratio_filt_mark { return Err( "\nIf you use UMI_RATIO_FILT_MARK, you should also use NUMI_RATIO, to turn off \ the filter,\nas otherwise nothing will be marked.\n" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 2524568d3..bb7d2fe78 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -109,6 +109,8 @@ pub struct CellrangerOpt { // TOOD: split these back out into a separate struct? /// umi count filter pub umi_filt: bool, + /// umi ratio filter + pub umi_ratio_filt: bool, } impl Default for CellrangerOpt { @@ -119,6 +121,7 @@ impl Default for CellrangerOpt { proto: Default::default(), proto_metadata: Default::default(), umi_filt: true, + umi_ratio_filt: true, } } } @@ -454,7 +457,6 @@ pub struct ClonoFiltOptDefault { pub doublet: bool, // filter putative doublets pub fcell: Vec, // constraints from FCELL pub umi_filt_mark: bool, // umi count filter (but only mark) - pub umi_ratio_filt: bool, // umi ratio filter pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) pub weak_chains: bool, // filter weak chains from clonotypes pub whitef: bool, // only show clonotypes exhibiting whitelist contamination diff --git a/enclone_stuff/src/filter_umi.rs b/enclone_stuff/src/filter_umi.rs index 2a2febc59..1c4ac43d6 100644 --- a/enclone_stuff/src/filter_umi.rs +++ b/enclone_stuff/src/filter_umi.rs @@ -299,7 +299,7 @@ pub fn filter_umi( .insert(ex.clones[i][0].barcode.clone(), BarcodeFate::UmiRatio); } } - if ctl.clono_filt_opt_def.umi_ratio_filt { + if ctl.cr_opt.umi_ratio_filt { erase_if(&mut ex.clones, &to_delete[j]); if ex.ncells() == 0 { to_deletex[j] = true; @@ -308,7 +308,7 @@ pub fn filter_umi( } } if pass == 2 { - if ctl.clono_filt_opt_def.umi_ratio_filt { + if ctl.cr_opt.umi_ratio_filt { erase_if(&mut o, &to_deletex); } if !o.is_empty() { From 22824613376e28ce7c251d898ecb0d9c119138bb Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 19:20:20 -0400 Subject: [PATCH 14/52] Move NUMI_RATIO into cr_opts. --- enclone_core/src/defs.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index bb7d2fe78..466d83d68 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -152,6 +152,9 @@ impl CellrangerOpt { "NUMI" => { cr_opts.umi_filt = false; } + "NUMI_RATIO" => { + cr_opts.umi_ratio_filt = false; + } _ => { // FIXME unused_args.push(arg.clone()); From 14e09a4be0319943acae9bbae8494c6d415cf1ad Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 19:25:59 -0400 Subject: [PATCH 15/52] Make a note that NOPRINT is unused in enclone_ranger. --- enclone_ranger/src/main_enclone.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 2d36325e1..5848a4ef8 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -27,7 +27,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "CELLRANGER", // done "DONOR_REF_FILE", // done "MAX_CORES", - "NOPRINT", + "NOPRINT", // now unused in enclone_ranger "PRE", "PROTO", // done "REF", From 76c134541a7f05cf53c52e3a32046b01247c63da Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 19:35:36 -0400 Subject: [PATCH 16/52] Move REF into cr_opts. --- enclone_args/src/proc_args.rs | 1 - enclone_args/src/proc_args2.rs | 2 +- enclone_core/src/defs.rs | 15 +++++++++++++-- enclone_ranger/src/main_enclone.rs | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 2f09091d8..6395fac2a 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -628,7 +628,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("BC_JOINT", &mut ctl.gen_opt.bc_joint), ("EXTERNAL_REF", &mut ctl.gen_opt.external_ref), ("POST_FILTER", &mut ctl.gen_opt.post_filter), - ("REF", &mut ctl.gen_opt.refname), ]; // Define arguments that do nothing (because already parsed), and which have no "= value" part. diff --git a/enclone_args/src/proc_args2.rs b/enclone_args/src/proc_args2.rs index e0c0ecd09..6a92c0ff9 100644 --- a/enclone_args/src/proc_args2.rs +++ b/enclone_args/src/proc_args2.rs @@ -144,7 +144,7 @@ pub fn proc_args_tail(ctl: &mut EncloneControl, args: &[String]) -> Result<(), S if !ctl.clono_print_opt.amino.is_empty() { ctl.clono_print_opt.cvars.insert(0, "amino".to_string()); } - if ctl.gen_opt.mouse && !ctl.gen_opt.refname.is_empty() { + if ctl.gen_opt.mouse && !ctl.cr_opt.refname.is_empty() { return Err( "\nIf you specify REF, please do not also specify MOUSE. It is enough to\n\ set REF to a mouse reference sequence.\n" diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 466d83d68..598248e9e 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -98,6 +98,9 @@ impl OriginInfo { pub struct CellrangerOpt { /// True if enclone is being called from Cellranger. pub cellranger: bool, + /// Path to reference. + pub refname: String, + /// Path to donor reference output file. pub dref_file: String, /// Path to protobuf output file. @@ -117,6 +120,7 @@ impl Default for CellrangerOpt { fn default() -> Self { Self { cellranger: Default::default(), + refname: Default::default(), dref_file: Default::default(), proto: Default::default(), proto_metadata: Default::default(), @@ -135,7 +139,11 @@ impl CellrangerOpt { for arg in args { let mut pieces = arg.split('='); let arg_name = pieces.next().unwrap(); - let mut get_rest = || pieces.join("="); + let mut get_rest = || { + let result = pieces.join("="); + assert!(!result.is_empty(), "no value provided for {arg_name}"); + result + }; match arg_name { "CELLRANGER" => { cr_opts.cellranger = true; @@ -169,6 +177,10 @@ impl CellrangerOpt { if !self.dref_file.is_empty() { // TODO: test writability } + + if !self.refname.is_empty() { + // TODO: check readability + } Ok(()) } } @@ -212,7 +224,6 @@ pub struct GeneralOpt { pub extc: HashMap<(String, String), String>, pub extn: HashMap, pub mouse: bool, - pub refname: String, pub noprint: bool, pub noprintx: bool, pub required_fps: Option, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 5848a4ef8..c71720af0 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -30,7 +30,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "NOPRINT", // now unused in enclone_ranger "PRE", "PROTO", // done - "REF", + "REF", // done ]; const ALLOWED_ARGS: [&str; 16] = [ "BCR", @@ -114,7 +114,7 @@ pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result Date: Tue, 16 Apr 2024 19:58:31 -0400 Subject: [PATCH 17/52] Move PRE= into cr_opts. --- enclone_args/src/proc_args3.rs | 10 +++++----- enclone_args/src/proc_args_post.rs | 4 ++-- enclone_core/src/defs.rs | 11 ++++++++++- enclone_ranger/src/main_enclone.rs | 10 +--------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/enclone_args/src/proc_args3.rs b/enclone_args/src/proc_args3.rs index 530d24d3c..4d5169095 100644 --- a/enclone_args/src/proc_args3.rs +++ b/enclone_args/src/proc_args3.rs @@ -68,14 +68,14 @@ fn expand_analysis_sets(x: &str) -> Result { // Functions to find the path to data. pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> Result { - for x in &ctl.gen_opt.pre { + for x in &ctl.cr_opt.pre { let pp = format!("{x}/{p}"); if path_exists(&pp) { return Ok(pp); } } if !path_exists(p) { - if ctl.gen_opt.pre.is_empty() { + if ctl.cr_opt.pre.is_empty() { let path = std::env::current_dir().unwrap(); return Err(format!( "\nIn directory {}, unable to find the path {}. This came from the {} argument.\n", @@ -86,7 +86,7 @@ pub fn get_path_fail(p: &str, ctl: &EncloneControl, source: &str) -> Result Result Result String { *ok = false; - for x in &ctl.gen_opt.pre { + for x in &ctl.cr_opt.pre { let mut pp = format!("{x}/{p}"); if pp.starts_with('~') { tilde_expand_me(&mut pp); diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 6a1bd70ad..1b10d8e41 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -497,8 +497,8 @@ pub fn proc_args_post( let f = get_path_fail(meta, ctl, "META")?; if f.contains('/') { let d = f.rev_before("/").to_string(); - if !ctl.gen_opt.pre.contains(&d) { - ctl.gen_opt.pre.push(d); + if !ctl.cr_opt.pre.contains(&d) { + ctl.cr_opt.pre.push(d); } } v.push(f); diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 598248e9e..7c3f2b825 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -97,7 +97,10 @@ impl OriginInfo { #[derive(PartialEq)] pub struct CellrangerOpt { /// True if enclone is being called from Cellranger. + // FIXME: always true when called from CR... we should figure out how to + // eliminate this. pub cellranger: bool, + pub pre: Vec, /// Path to reference. pub refname: String, @@ -120,6 +123,7 @@ impl Default for CellrangerOpt { fn default() -> Self { Self { cellranger: Default::default(), + pre: Default::default(), refname: Default::default(), dref_file: Default::default(), proto: Default::default(), @@ -148,6 +152,12 @@ impl CellrangerOpt { "CELLRANGER" => { cr_opts.cellranger = true; } + "PRE" => { + cr_opts.pre = get_rest().split(',').map(str::to_string).collect(); + } + "REF" => { + cr_opts.refname = get_rest(); + } "DONOR_REF_FILE" => { cr_opts.dref_file = get_rest(); } @@ -189,7 +199,6 @@ impl CellrangerOpt { #[derive(Default, PartialEq)] pub struct GeneralOpt { - pub pre: Vec, pub indels: bool, pub reannotate: bool, pub heavy_chain_reuse: bool, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index c71720af0..a8a14ede9 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -23,7 +23,7 @@ use string_utils::TextUtils; use vdj_ann::refx; pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { - const REQUIRED_ARGS: [&str; 8] = [ + const REQUIRED_ARGS: [&str; 7] = [ "CELLRANGER", // done "DONOR_REF_FILE", // done "MAX_CORES", @@ -91,14 +91,6 @@ pub fn main_enclone_setup_ranger(args: Vec) -> anyhow::Result Date: Tue, 16 Apr 2024 20:06:53 -0400 Subject: [PATCH 19/52] Make a note about refactoring how we handle MAX_CORES. --- enclone_ranger/src/main_enclone.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index d889d1545..7247b6504 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -26,11 +26,11 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { const REQUIRED_ARGS: [&str; 7] = [ "CELLRANGER", // done "DONOR_REF_FILE", // done - "MAX_CORES", - "NOPRINT", // now unused in enclone_ranger - "PRE", // done - "PROTO", // done - "REF", // done + "MAX_CORES", // FIXME: move this behavior into enclone and set thread count in CR when calling + "NOPRINT", // now unused in enclone_ranger + "PRE", // done + "PROTO", // done + "REF", // done ]; const ALLOWED_ARGS: [&str; 16] = [ "BCR", From b4603626905f395e0bfa2dff2e369e7d2dcd112d Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:07:17 -0400 Subject: [PATCH 20/52] Mark NUMI and NUMI_RATIO as complete. --- enclone_ranger/src/main_enclone.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 7247b6504..a856389bb 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -40,8 +40,8 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "TCRGD", "GAMMA_DELTA", "FATE_FILE", - "NUMI", - "NUMI_RATIO", + "NUMI", // done + "NUMI_RATIO", // done "NGRAPH_FILTER", "NWEAK_CHAINS", "NFOURSIE_KILL", From 43da902bb1b5f4b4c9e3dd2705e8904d8e10bd75 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:16:59 -0400 Subject: [PATCH 21/52] Move FATE_FILE into cr_opts. --- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 8 ++++++-- enclone_process/src/process_clonotypes.rs | 4 ++-- enclone_ranger/src/main_enclone.rs | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 6395fac2a..b826d672a 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -597,7 +597,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let set_string_writeable = [ ("BINARY", &mut ctl.gen_opt.binary), - ("FATE_FILE", &mut ctl.gen_opt.fate_file), ("HONEY_OUT", &mut ctl.plot_opt.honey_out), ("SUBSET_JSON", &mut ctl.gen_opt.subset_json), ]; diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 7c3f2b825..416f3e3eb 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -110,6 +110,8 @@ pub struct CellrangerOpt { pub proto: String, /// Optional path to a json file containing metadata. pub proto_metadata: String, + /// Optional path to write out barcode fate. + pub fate_file: String, // Clonotype filtering options. // TOOD: split these back out into a separate struct? @@ -128,6 +130,7 @@ impl Default for CellrangerOpt { dref_file: Default::default(), proto: Default::default(), proto_metadata: Default::default(), + fate_file: Default::default(), umi_filt: true, umi_ratio_filt: true, } @@ -187,7 +190,9 @@ impl CellrangerOpt { if !self.dref_file.is_empty() { // TODO: test writability } - + if !self.fate_file.is_empty() { + // TODO: test writability + } if !self.refname.is_empty() { // TODO: check readability } @@ -225,7 +230,6 @@ pub struct GeneralOpt { pub complete: bool, pub exact: Option, pub binary: String, - pub fate_file: String, pub h5_pre: bool, pub accept_reuse: bool, pub descrip: bool, diff --git a/enclone_process/src/process_clonotypes.rs b/enclone_process/src/process_clonotypes.rs index 54938c29c..5f3427ce9 100644 --- a/enclone_process/src/process_clonotypes.rs +++ b/enclone_process/src/process_clonotypes.rs @@ -227,9 +227,9 @@ pub fn process_clonotypes( results.sort_by_key(|(num_cells, _, _)| Reverse(*num_cells)); // Write out the fate of each filtered barcode. - if !ctl.gen_opt.fate_file.is_empty() { + if !ctl.cr_opt.fate_file.is_empty() { let mut wtr = BufWriter::new( - File::create(&ctl.gen_opt.fate_file).expect("Unable to open FATE_FILE for writing"), + File::create(&ctl.cr_opt.fate_file).expect("Unable to open FATE_FILE for writing"), ); serde_json::to_writer_pretty(&mut wtr, &fate).map_err(|e| e.to_string())?; } diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index a856389bb..bd8176ece 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -39,7 +39,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "TCR", "TCRGD", "GAMMA_DELTA", - "FATE_FILE", + "FATE_FILE", // done "NUMI", // done "NUMI_RATIO", // done "NGRAPH_FILTER", From 698019f68d7a4ef825c7e1c2a0a02776dd5b4c91 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:21:29 -0400 Subject: [PATCH 22/52] Move GAMMA_DELTA into cr_opts. --- enclone_args/src/proc_args.rs | 5 ++--- enclone_args/src/read_json.rs | 4 ++-- enclone_core/src/defs.rs | 11 ++++++++++- enclone_ranger/src/main_enclone.rs | 8 ++++---- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index b826d672a..a7998904a 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -412,7 +412,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("FOLD_HEADERS", &mut ctl.gen_opt.fold_headers), ("FORCE", &mut ctl.force), ("FULL_SEQC", &mut ctl.clono_print_opt.full_seqc), - ("GAMMA_DELTA", &mut ctl.gen_opt.gamma_delta), ("GRAPH", &mut ctl.gen_opt.graph), ( "GROUP_CDR3H_LEN_VAR", @@ -919,12 +918,12 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String // Do residual argument processing. - if ctl.gen_opt.gamma_delta && !have_tcrgd || !ctl.gen_opt.gamma_delta && have_tcrgd { + if ctl.cr_opt.gamma_delta && !have_tcrgd || !ctl.cr_opt.gamma_delta && have_tcrgd { return Err( "\n. GAMMA_DELTA flag has to be enabled for using TCRGD= and vice versa.\n".to_string(), ); } - if ctl.gen_opt.gamma_delta && (have_bcr || have_gex || have_meta || have_tcr) { + if ctl.cr_opt.gamma_delta && (have_bcr || have_gex || have_meta || have_tcr) { return Err( "\n. Unsupported input type in GAMMA_DELTA mode. Only TCRGD= input is supported.\n" .to_string(), diff --git a/enclone_args/src/read_json.rs b/enclone_args/src/read_json.rs index 7e43c25fe..be58717a3 100644 --- a/enclone_args/src/read_json.rs +++ b/enclone_args/src/read_json.rs @@ -201,7 +201,7 @@ fn process_json_annotation( chain_type = refdata.name[t][0..3].to_string(); if chain_type == *"IGH" || chain_type == *"TRB" - || (chain_type == *"TRD" && ctl.gen_opt.gamma_delta) + || (chain_type == *"TRD" && ctl.cr_opt.gamma_delta) { left = true; } @@ -291,7 +291,7 @@ fn process_json_annotation( cdr3_start -= tig_start as usize; if chain == VdjChain::IGH || chain == VdjChain::TRB - || (chain == VdjChain::TRD && ctl.gen_opt.gamma_delta) + || (chain == VdjChain::TRD && ctl.cr_opt.gamma_delta) { left = true; } diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 416f3e3eb..81a59951c 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -113,6 +113,9 @@ pub struct CellrangerOpt { /// Optional path to write out barcode fate. pub fate_file: String, + /// True if running in gamma-delta mode. + pub gamma_delta: bool, + // Clonotype filtering options. // TOOD: split these back out into a separate struct? /// umi count filter @@ -131,6 +134,7 @@ impl Default for CellrangerOpt { proto: Default::default(), proto_metadata: Default::default(), fate_file: Default::default(), + gamma_delta: Default::default(), umi_filt: true, umi_ratio_filt: true, } @@ -170,6 +174,12 @@ impl CellrangerOpt { "PROTO_METADATA" => { cr_opts.proto_metadata = get_rest(); } + "FATE_FILE" => { + cr_opts.fate_file = get_rest(); + } + "GAMMA_DELTA" => { + cr_opts.gamma_delta = true; + } "NUMI" => { cr_opts.umi_filt = false; } @@ -324,7 +334,6 @@ pub struct GeneralOpt { pub var_def: Vec<(String, String, Node, String)>, // {(variable, value, compiled value, expr)} pub nospaces: bool, pub subsample: f64, - pub gamma_delta: bool, pub pre_eval: bool, pub pre_eval_show: bool, pub external_ref: String, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index bd8176ece..b5dc36550 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -38,10 +38,10 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "PROTO_METADATA", // done "TCR", "TCRGD", - "GAMMA_DELTA", - "FATE_FILE", // done - "NUMI", // done - "NUMI_RATIO", // done + "GAMMA_DELTA", // done + "FATE_FILE", // done + "NUMI", // done + "NUMI_RATIO", // done "NGRAPH_FILTER", "NWEAK_CHAINS", "NFOURSIE_KILL", From 4f9b0639ab7a463654d6de6892c3e519a2a67792 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:23:53 -0400 Subject: [PATCH 23/52] Move NGRAPH_FILTER into cr_opts. --- enclone/src/graph_filter.rs | 2 +- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 7 ++++++- enclone_ranger/src/main_enclone.rs | 10 +++++----- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/enclone/src/graph_filter.rs b/enclone/src/graph_filter.rs index f82f8bdd4..474740c65 100644 --- a/enclone/src/graph_filter.rs +++ b/enclone/src/graph_filter.rs @@ -379,7 +379,7 @@ pub fn graph_filter( .insert(tig_bc[i][0].barcode.clone(), BarcodeFate::GraphFilter); } } - if !ctl.gen_opt.ngraph_filter { + if !ctl.cr_opt.ngraph_filter { erase_if(tig_bc, &to_delete); } if graph { diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index a7998904a..29ee1eb3e 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -439,7 +439,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NEWICK", &mut ctl.gen_opt.newick), ("NGEX", &mut ctl.clono_filt_opt_def.ngex), ("NOGRAY", &mut ctl.nogray), - ("NGRAPH_FILTER", &mut ctl.gen_opt.ngraph_filter), ("NGROUP", &mut ctl.clono_group_opt.ngroup), ("NIMPROPER", &mut ctl.merge_all_impropers), ("NMAX", &mut ctl.clono_filt_opt_def.nmax), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 81a59951c..19d165e86 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -116,6 +116,8 @@ pub struct CellrangerOpt { /// True if running in gamma-delta mode. pub gamma_delta: bool, + pub ngraph_filter: bool, + // Clonotype filtering options. // TOOD: split these back out into a separate struct? /// umi count filter @@ -135,6 +137,7 @@ impl Default for CellrangerOpt { proto_metadata: Default::default(), fate_file: Default::default(), gamma_delta: Default::default(), + ngraph_filter: Default::default(), umi_filt: true, umi_ratio_filt: true, } @@ -180,6 +183,9 @@ impl CellrangerOpt { "GAMMA_DELTA" => { cr_opts.gamma_delta = true; } + "NGRAPH_FILTER" => { + cr_opts.ngraph_filter = true; + } "NUMI" => { cr_opts.umi_filt = false; } @@ -217,7 +223,6 @@ pub struct GeneralOpt { pub indels: bool, pub reannotate: bool, pub heavy_chain_reuse: bool, - pub ngraph_filter: bool, pub graph: bool, pub utr_con: bool, pub con_con: bool, diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index b5dc36550..f684c500c 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -38,11 +38,11 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "PROTO_METADATA", // done "TCR", "TCRGD", - "GAMMA_DELTA", // done - "FATE_FILE", // done - "NUMI", // done - "NUMI_RATIO", // done - "NGRAPH_FILTER", + "GAMMA_DELTA", // done + "FATE_FILE", // done + "NUMI", // done + "NUMI_RATIO", // done + "NGRAPH_FILTER", // done "NWEAK_CHAINS", "NFOURSIE_KILL", "NDOUBLET", From 6e623dc3f22378d0adc249aec62ff19802e1f08b Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:26:47 -0400 Subject: [PATCH 24/52] Move NWEAK_CHAINS into cr_opts. --- enclone_args/src/proc_args.rs | 2 -- enclone_core/src/defs.rs | 7 ++++++- enclone_ranger/src/main_enclone.rs | 2 +- enclone_stuff/src/weak_chains.rs | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 29ee1eb3e..a90b46b53 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -124,7 +124,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt.max_chains = 1000000; ctl.clono_filt_opt.qual_filter = true; ctl.clono_filt_opt_def.signature = true; - ctl.clono_filt_opt_def.weak_chains = true; ctl.clono_filt_opt_def.weak_onesies = true; ctl.clono_filt_opt_def.weak_foursies = true; ctl.clono_filt_opt_def.doublet = true; @@ -512,7 +511,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), ("NSIG", &mut ctl.clono_filt_opt_def.signature), ("NSILENT", &mut ctl.silent), - ("NWEAK_CHAINS", &mut ctl.clono_filt_opt_def.weak_chains), ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), ]; diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 19d165e86..f5221cc99 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -124,6 +124,8 @@ pub struct CellrangerOpt { pub umi_filt: bool, /// umi ratio filter pub umi_ratio_filt: bool, + /// filter weak chains from clonotypes + pub weak_chains: bool, } impl Default for CellrangerOpt { @@ -140,6 +142,7 @@ impl Default for CellrangerOpt { ngraph_filter: Default::default(), umi_filt: true, umi_ratio_filt: true, + weak_chains: true, } } } @@ -192,6 +195,9 @@ impl CellrangerOpt { "NUMI_RATIO" => { cr_opts.umi_ratio_filt = false; } + "NWEAK_CHAINS" => { + cr_opts.weak_chains = false; + } _ => { // FIXME unused_args.push(arg.clone()); @@ -499,7 +505,6 @@ pub struct ClonoFiltOptDefault { pub fcell: Vec, // constraints from FCELL pub umi_filt_mark: bool, // umi count filter (but only mark) pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) - pub weak_chains: bool, // filter weak chains from clonotypes pub whitef: bool, // only show clonotypes exhibiting whitelist contamination pub ncross: bool, // turn off cross filtering, pub bc_dup: bool, // filter duplicated barcodes within an exact subclonotype diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index f684c500c..64e3782f4 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -43,7 +43,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "NUMI", // done "NUMI_RATIO", // done "NGRAPH_FILTER", // done - "NWEAK_CHAINS", + "NWEAK_CHAINS", // done "NFOURSIE_KILL", "NDOUBLET", "NSIG", diff --git a/enclone_stuff/src/weak_chains.rs b/enclone_stuff/src/weak_chains.rs index 98ca876f6..5cee4cf90 100644 --- a/enclone_stuff/src/weak_chains.rs +++ b/enclone_stuff/src/weak_chains.rs @@ -75,7 +75,7 @@ pub fn weak_chains( for j in 0..cols { if ncells[j] <= 20 && 8 * ncells[j] < total_cells { for d in &col_entries[j] { - if ctl.clono_filt_opt_def.weak_chains { + if ctl.cr_opt.weak_chains { res.2.push(exacts[*d]); } let ex = &exact_clonotypes[exacts[*d]]; From f8ce1ed942a2f374c44139d3833f5b649664b3bd Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:29:19 -0400 Subject: [PATCH 25/52] Move NFOURSIE_KILL into cr_opts. --- enclone_args/src/proc_args.rs | 2 -- enclone_core/src/defs.rs | 7 ++++++- enclone_ranger/src/main_enclone.rs | 2 +- enclone_stuff/src/start.rs | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index a90b46b53..34be1c7ea 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -125,7 +125,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt.qual_filter = true; ctl.clono_filt_opt_def.signature = true; ctl.clono_filt_opt_def.weak_onesies = true; - ctl.clono_filt_opt_def.weak_foursies = true; ctl.clono_filt_opt_def.doublet = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; @@ -506,7 +505,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("H5_SLICE", &mut ctl.gen_opt.h5_pre), ("NBC_DUP", &mut ctl.clono_filt_opt_def.bc_dup), ("NDOUBLET", &mut ctl.clono_filt_opt_def.doublet), - ("NFOURSIE_KILL", &mut ctl.clono_filt_opt_def.weak_foursies), ("NMERGE_ONESIES", &mut ctl.join_alg_opt.merge_onesies_ctl), ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), ("NSIG", &mut ctl.clono_filt_opt_def.signature), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f5221cc99..670c91403 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -126,6 +126,8 @@ pub struct CellrangerOpt { pub umi_ratio_filt: bool, /// filter weak chains from clonotypes pub weak_chains: bool, + /// filter weak foursies + pub weak_foursies: bool, } impl Default for CellrangerOpt { @@ -143,6 +145,7 @@ impl Default for CellrangerOpt { umi_filt: true, umi_ratio_filt: true, weak_chains: true, + weak_foursies: true, } } } @@ -198,6 +201,9 @@ impl CellrangerOpt { "NWEAK_CHAINS" => { cr_opts.weak_chains = false; } + "NFOURSIE_KILL" => { + cr_opts.weak_foursies = false; + } _ => { // FIXME unused_args.push(arg.clone()); @@ -497,7 +503,6 @@ pub struct JoinAlgOpt { pub struct ClonoFiltOptDefault { pub marked_b: bool, // only print clonotypes having a mark and which are typed as B cells pub donor: bool, // allow cells from different donors to be placed in the same clonotype - pub weak_foursies: bool, // filter weak foursies pub ngex: bool, // turn off gex filtering, pub non_cell_mark: bool, pub weak_onesies: bool, // filter weak onesies diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 64e3782f4..6d4340e9e 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -44,7 +44,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "NUMI_RATIO", // done "NGRAPH_FILTER", // done "NWEAK_CHAINS", // done - "NFOURSIE_KILL", + "NFOURSIE_KILL", // done "NDOUBLET", "NSIG", "SPLIT_MAX_CHAINS", diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index c8f8774a6..3e9e1f356 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -249,7 +249,7 @@ pub fn main_enclone_start( } } } - if ctl.clono_filt_opt_def.weak_foursies { + if ctl.cr_opt.weak_foursies { erase_if(&mut exact_clonotypes, &to_delete); } From 840ab1f1c1af4c71c62528fb71768d0e6b3f225c Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 20:31:24 -0400 Subject: [PATCH 26/52] Move NDOUBLET into cr_opts. --- enclone_args/src/proc_args.rs | 2 -- enclone_core/src/defs.rs | 4 +++- enclone_ranger/src/main_enclone.rs | 2 +- enclone_stuff/src/doublets.rs | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 34be1c7ea..fd57fb038 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -125,7 +125,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt.qual_filter = true; ctl.clono_filt_opt_def.signature = true; ctl.clono_filt_opt_def.weak_onesies = true; - ctl.clono_filt_opt_def.doublet = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; ctl.clono_filt_opt.max_exacts = 1_000_000_000; @@ -504,7 +503,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let mut set_false = vec![ ("H5_SLICE", &mut ctl.gen_opt.h5_pre), ("NBC_DUP", &mut ctl.clono_filt_opt_def.bc_dup), - ("NDOUBLET", &mut ctl.clono_filt_opt_def.doublet), ("NMERGE_ONESIES", &mut ctl.join_alg_opt.merge_onesies_ctl), ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), ("NSIG", &mut ctl.clono_filt_opt_def.signature), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 670c91403..5ffd70e10 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -128,6 +128,8 @@ pub struct CellrangerOpt { pub weak_chains: bool, /// filter weak foursies pub weak_foursies: bool, + /// filter putative doublets + pub doublet: bool, } impl Default for CellrangerOpt { @@ -146,6 +148,7 @@ impl Default for CellrangerOpt { umi_ratio_filt: true, weak_chains: true, weak_foursies: true, + doublet: true, } } } @@ -506,7 +509,6 @@ pub struct ClonoFiltOptDefault { pub ngex: bool, // turn off gex filtering, pub non_cell_mark: bool, pub weak_onesies: bool, // filter weak onesies - pub doublet: bool, // filter putative doublets pub fcell: Vec, // constraints from FCELL pub umi_filt_mark: bool, // umi count filter (but only mark) pub umi_ratio_filt_mark: bool, // umi ratio filter (but only mark) diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 6d4340e9e..100183d52 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -45,7 +45,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "NGRAPH_FILTER", // done "NWEAK_CHAINS", // done "NFOURSIE_KILL", // done - "NDOUBLET", + "NDOUBLET", // done "NSIG", "SPLIT_MAX_CHAINS", "NCROSS", diff --git a/enclone_stuff/src/doublets.rs b/enclone_stuff/src/doublets.rs index f11c4c10d..3e3e4b098 100644 --- a/enclone_stuff/src/doublets.rs +++ b/enclone_stuff/src/doublets.rs @@ -31,7 +31,7 @@ pub fn delete_doublets( dref: &[DonorReferenceItem], fate: &mut [BarcodeFates], ) { - if ctl.clono_filt_opt_def.doublet { + if ctl.cr_opt.doublet { // Define pure subclonotypes. To do this we break each clonotype up by chain signature. // Note duplication of code with print_clonotypes.rs. And this is doing some // superfluous compute. From fa806250172f3fdfb0c1e2d0d90beb1df1cb78b6 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 21:30:43 -0400 Subject: [PATCH 27/52] Move NSIG into cr_opts. --- enclone_args/src/proc_args.rs | 11 +---------- enclone_args/src/process_special_arg1.rs | 9 +-------- enclone_core/src/defs.rs | 10 +++++++++- enclone_ranger/src/main_enclone.rs | 2 +- enclone_stuff/src/some_filters.rs | 2 +- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index fd57fb038..4e8195938 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -123,7 +123,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ctl.clono_filt_opt.min_umi = 0; ctl.clono_filt_opt.max_chains = 1000000; ctl.clono_filt_opt.qual_filter = true; - ctl.clono_filt_opt_def.signature = true; ctl.clono_filt_opt_def.weak_onesies = true; ctl.clono_filt_opt_def.bc_dup = true; ctl.clono_filt_opt.max_datasets = 1000000000; @@ -505,7 +504,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NBC_DUP", &mut ctl.clono_filt_opt_def.bc_dup), ("NMERGE_ONESIES", &mut ctl.join_alg_opt.merge_onesies_ctl), ("NQUAL", &mut ctl.clono_filt_opt.qual_filter), - ("NSIG", &mut ctl.clono_filt_opt_def.signature), ("NSILENT", &mut ctl.silent), ("NWEAK_ONESIES", &mut ctl.clono_filt_opt_def.weak_onesies), ("PRINT_FAILED_JOINS", &mut ctl.join_print_opt.quiet), @@ -888,14 +886,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String if processed[i] { continue; } - if !process_special_arg1( - &args[i], - ctl, - &mut metas, - &mut metaxs, - &mut xcrs, - &mut using_plot, - )? { + if !process_special_arg1(&args[i], ctl)? { process_special_arg2( &args[i], ctl, diff --git a/enclone_args/src/process_special_arg1.rs b/enclone_args/src/process_special_arg1.rs index 161e15327..001a632fd 100644 --- a/enclone_args/src/process_special_arg1.rs +++ b/enclone_args/src/process_special_arg1.rs @@ -19,14 +19,7 @@ use std::fs::{read_to_string, remove_file, File}; use string_utils::TextUtils; use vector_utils::{unique_sort, VecUtils}; -pub fn process_special_arg1( - arg: &str, - ctl: &mut EncloneControl, - _metas: &mut [String], - _metaxs: &mut [String], - _xcrs: &mut [String], - _using_plot: &mut bool, -) -> Result { +pub fn process_special_arg1(arg: &str, ctl: &mut EncloneControl) -> Result { // Process the argument. if is_simple_arg(arg, "SEQ")? { diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 5ffd70e10..8f2f229c6 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -130,6 +130,8 @@ pub struct CellrangerOpt { pub weak_foursies: bool, /// filter putative doublets pub doublet: bool, + /// signature filtering + pub signature: bool, } impl Default for CellrangerOpt { @@ -149,6 +151,7 @@ impl Default for CellrangerOpt { weak_chains: true, weak_foursies: true, doublet: true, + signature: true, } } } @@ -207,6 +210,12 @@ impl CellrangerOpt { "NFOURSIE_KILL" => { cr_opts.weak_foursies = false; } + "NDOUBLET" => { + cr_opts.doublet = false; + } + "NSIG" => { + cr_opts.signature = false; + } _ => { // FIXME unused_args.push(arg.clone()); @@ -515,7 +524,6 @@ pub struct ClonoFiltOptDefault { pub whitef: bool, // only show clonotypes exhibiting whitelist contamination pub ncross: bool, // turn off cross filtering, pub bc_dup: bool, // filter duplicated barcodes within an exact subclonotype - pub signature: bool, // signature filtering pub nmax: bool, // turn off max contigs filter } diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index 100183d52..a57061f8b 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -46,7 +46,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { "NWEAK_CHAINS", // done "NFOURSIE_KILL", // done "NDOUBLET", // done - "NSIG", + "NSIG", // done "SPLIT_MAX_CHAINS", "NCROSS", ]; diff --git a/enclone_stuff/src/some_filters.rs b/enclone_stuff/src/some_filters.rs index bcd0d48d7..b16752e09 100644 --- a/enclone_stuff/src/some_filters.rs +++ b/enclone_stuff/src/some_filters.rs @@ -147,7 +147,7 @@ pub fn some_filters( t.push(col); } } - if dels.contains(&t) && ctl.clono_filt_opt_def.signature { + if dels.contains(&t) && ctl.cr_opt.signature { res.2.push(exacts[u]); let ex = &exact_clonotypes[exacts[u]]; for i in 0..ex.ncells() { From 786eebc8f3bf91acd103569673361a985e161344 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Tue, 16 Apr 2024 21:52:26 -0400 Subject: [PATCH 28/52] Move META into cr_opts. --- enclone_args/src/proc_args.rs | 16 +++------------- enclone_args/src/proc_args_post.rs | 7 +++---- enclone_args/src/process_special_arg2.rs | 10 +--------- enclone_core/src/defs.rs | 11 +++++++++++ enclone_ranger/src/main_enclone.rs | 2 +- 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 4e8195938..2a8913895 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -169,7 +169,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let mut have_meta = false; let mut gex = String::new(); let mut bc = String::new(); - let mut metas = Vec::::new(); let mut metaxs = Vec::::new(); let mut xcrs = Vec::::new(); for i in 1..args.len() { @@ -183,7 +182,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String have_bcr = true; } else if args[i].starts_with("GEX=") { have_gex = true; - } else if args[i].starts_with("META=") || args[i].starts_with("METAX=") { + } else if !ctl.cr_opt.metas.is_empty() || args[i].starts_with("METAX=") { have_meta = true; } if args[i].starts_with("GEX=") { @@ -887,14 +886,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String continue; } if !process_special_arg1(&args[i], ctl)? { - process_special_arg2( - &args[i], - ctl, - &mut metas, - &mut metaxs, - &mut xcrs, - &mut using_plot, - )?; + process_special_arg2(&args[i], ctl, &mut metaxs, &mut xcrs, &mut using_plot)?; } } @@ -913,8 +905,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String .to_string(), ); } - proc_args_post( - ctl, &args, &metas, &metaxs, &xcrs, have_gex, &gex, &bc, using_plot, - )?; + proc_args_post(ctl, &args, &metaxs, &xcrs, have_gex, &gex, &bc, using_plot)?; Ok(()) } diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 1b10d8e41..65dd33dce 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -147,7 +147,6 @@ fn parse_bc_joint(ctl: &mut EncloneControl) -> Result<(), String> { pub fn proc_args_post( ctl: &mut EncloneControl, args: &[String], - metas: &[String], metaxs: &[String], xcrs: &[String], have_gex: bool, @@ -491,9 +490,9 @@ pub fn proc_args_post( // Process TCR, BCR and META. check_cvars(ctl)?; - if !metas.is_empty() { - let mut v = Vec::::with_capacity(metas.len()); - for meta in metas { + if !ctl.cr_opt.metas.is_empty() { + let mut v = Vec::::with_capacity(ctl.cr_opt.metas.len()); + for meta in &ctl.cr_opt.metas { let f = get_path_fail(meta, ctl, "META")?; if f.contains('/') { let d = f.rev_before("/").to_string(); diff --git a/enclone_args/src/process_special_arg2.rs b/enclone_args/src/process_special_arg2.rs index 6ebf1c9f6..d1eef2a5a 100644 --- a/enclone_args/src/process_special_arg2.rs +++ b/enclone_args/src/process_special_arg2.rs @@ -5,7 +5,7 @@ use crate::proc_args2::{is_f64_arg, is_usize_arg}; use enclone_core::defs::{EncloneControl, GeneScanOpts}; use enclone_core::linear_condition::LinearCondition; -use enclone_core::{require_readable_file, tilde_expand_me}; +use enclone_core::require_readable_file; use evalexpr::build_operator_tree; use io_utils::open_for_read; use regex::Regex; @@ -16,7 +16,6 @@ use vector_utils::unique_sort; pub fn process_special_arg2( arg: &str, ctl: &mut EncloneControl, - metas: &mut Vec, metaxs: &mut Vec, xcrs: &mut Vec, using_plot: &mut bool, @@ -665,13 +664,6 @@ pub fn process_special_arg2( } else if is_usize_arg(arg, "CELLS")? { ctl.clono_filt_opt.ncells_low = arg.after("CELLS=").force_usize(); ctl.clono_filt_opt.ncells_high = ctl.clono_filt_opt.ncells_low; - } else if arg.starts_with("META=") { - let v = arg.after("META=").split(','); - for f in v { - let mut f = f.to_string(); - tilde_expand_me(&mut f); - metas.push(f); - } } else if arg.starts_with("METAX=") { let f = arg.after("METAX="); let f = f.chars().filter(|c| !c.is_whitespace()).collect(); diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 8f2f229c6..7dc75e786 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -2,6 +2,7 @@ use crate::cell_color::CellColor; use crate::linear_condition::LinearCondition; +use crate::tilde_expand_me; use anyhow::Result; use debruijn::dna_string::DnaString; use evalexpr::Node; @@ -103,6 +104,8 @@ pub struct CellrangerOpt { pub pre: Vec, /// Path to reference. pub refname: String, + /// Paths to optional metadata files. + pub metas: Vec, /// Path to donor reference output file. pub dref_file: String, @@ -140,6 +143,7 @@ impl Default for CellrangerOpt { cellranger: Default::default(), pre: Default::default(), refname: Default::default(), + metas: Default::default(), dref_file: Default::default(), proto: Default::default(), proto_metadata: Default::default(), @@ -180,6 +184,13 @@ impl CellrangerOpt { "REF" => { cr_opts.refname = get_rest(); } + "META" => { + for meta in get_rest().split(',') { + let mut f = meta.to_string(); + tilde_expand_me(&mut f); + cr_opts.metas.push(f); + } + } "DONOR_REF_FILE" => { cr_opts.dref_file = get_rest(); } diff --git a/enclone_ranger/src/main_enclone.rs b/enclone_ranger/src/main_enclone.rs index a57061f8b..a562add03 100644 --- a/enclone_ranger/src/main_enclone.rs +++ b/enclone_ranger/src/main_enclone.rs @@ -34,7 +34,7 @@ pub fn main_enclone_ranger(args: Vec) -> anyhow::Result<()> { ]; const ALLOWED_ARGS: [&str; 16] = [ "BCR", - "META", + "META", // done "PROTO_METADATA", // done "TCR", "TCRGD", From f6c0a9a77df9bb13b2b73690b8477ac413b448c7 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Wed, 17 Apr 2024 10:36:26 -0700 Subject: [PATCH 29/52] Tweak docstring. --- enclone_args/src/proc_args_post.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 65dd33dce..64872111a 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -487,7 +487,7 @@ pub fn proc_args_post( ); } - // Process TCR, BCR and META. + // Process TCR, BCR and metas. check_cvars(ctl)?; if !ctl.cr_opt.metas.is_empty() { From e4a7e868c07cc8cbd157fd2e32f971de6af0f8d7 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 12:35:24 -0700 Subject: [PATCH 30/52] Delete unused cr_version param. --- enclone_core/src/defs.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 7dc75e786..e8503c32b 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -301,7 +301,6 @@ pub struct GeneralOpt { pub summary: bool, pub summary_clean: bool, pub summary_csv: bool, - pub cr_version: String, pub nwarn: bool, pub gene_scan: Option, pub gene_scan_exact: bool, From cbcca5ebc7a22441b282f41863dd7fa9a20e5430 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 12:40:23 -0700 Subject: [PATCH 31/52] Remove dead WEAK option. --- enclone/src/UNDOC_OPTIONS | 1 - enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 01fd44fb3..39f9824ff 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -57,7 +57,6 @@ Other optional arguments: - FORCE: make joins even if redundant - EXP: exploratory code for exact clonotyping on -- WEAK: for EXP, print all and show weaks - GRAPH: show logging from light-heavy graph construction - UTR_CON: run experimental UTR consensus code - CON_CON: run experimental constant region consensus code diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 2a8913895..1836714e2 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -492,7 +492,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("UTR_CON", &mut ctl.gen_opt.utr_con), ("VDUP", &mut ctl.clono_filt_opt.vdup), ("VIS_DUMP", &mut ctl.gen_opt.vis_dump), - ("WEAK", &mut ctl.gen_opt.weak), ("WHITEF", &mut ctl.clono_filt_opt_def.whitef), ]; diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index e8503c32b..faddf04c6 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -262,7 +262,6 @@ pub struct GeneralOpt { pub utr_con: bool, pub con_con: bool, pub nwhitef: bool, - pub weak: bool, pub tcr: bool, pub bcr: bool, pub tcrgd: bool, From 93d0a0fd574685b985a0b8d1c77ff462d423f350 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 12:41:37 -0700 Subject: [PATCH 32/52] Remove unused EXP argument. --- enclone/src/UNDOC_OPTIONS | 1 - 1 file changed, 1 deletion(-) diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 39f9824ff..11c3431a0 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -56,7 +56,6 @@ Optional arguments that control printing of individual clonotypes: Other optional arguments: - FORCE: make joins even if redundant -- EXP: exploratory code for exact clonotyping on - GRAPH: show logging from light-heavy graph construction - UTR_CON: run experimental UTR consensus code - CON_CON: run experimental constant region consensus code From 6e8cc48d592ffa285cdfb14e96cbfd428047d96e Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 12:50:07 -0700 Subject: [PATCH 33/52] Remove never-read extc param. --- enclone_core/src/defs.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index faddf04c6..09c56b82c 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -283,7 +283,6 @@ pub struct GeneralOpt { pub accept_reuse: bool, pub descrip: bool, pub ext: String, - pub extc: HashMap<(String, String), String>, pub extn: HashMap, pub mouse: bool, pub noprint: bool, From cca0fc193a3ff5e7417c18ccfbab3a2a9333e1a2 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 12:53:28 -0700 Subject: [PATCH 34/52] Remove never-completed EXT functionality. --- enclone/src/UNDOC_OPTIONS | 7 ------- enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 2 -- 3 files changed, 10 deletions(-) diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 11c3431a0..5b3d466fb 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -80,13 +80,6 @@ expanded out. CELLRANGER: for use if called from cellranger -- changes failure message and prevents exit upon normal completion -EXT=filename: -Given output of an external clonotyping algorithm which took as inputs the pipeline outputs -for the lenas in enclone.testdata, for each exact subclonotype found by enclone, report its -composition in the external clonotyping, as clonotype_id[count], ... -The input file should have lines of the form: -sample barcode clonotype_id. - SUMMARY_CLEAN: if SUMMARY specified, don't show computational performance stats, so we can regress on output diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 1836714e2..9d9606dde 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -574,7 +574,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ), ("CLUSTAL_AA", &mut ctl.gen_opt.clustal_aa), ("CLUSTAL_DNA", &mut ctl.gen_opt.clustal_dna), - ("EXT", &mut ctl.gen_opt.ext), ("GROUP_CDR3", &mut ctl.clono_group_opt.cdr3), ("PCHAINS", &mut ctl.parseable_opt.pchains), ("SESSION_NAME", &mut ctl.gen_opt.session_name), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 09c56b82c..991cad6a6 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -282,8 +282,6 @@ pub struct GeneralOpt { pub h5_pre: bool, pub accept_reuse: bool, pub descrip: bool, - pub ext: String, - pub extn: HashMap, pub mouse: bool, pub noprint: bool, pub noprintx: bool, From b99ee83e76af049161e081958b542586ca7761ba Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:03:26 -0700 Subject: [PATCH 35/52] Delete dead FB_SHOW arg. --- enclone_args/src/process_special_arg1.rs | 28 ------------------------ enclone_core/src/defs.rs | 1 - 2 files changed, 29 deletions(-) diff --git a/enclone_args/src/process_special_arg1.rs b/enclone_args/src/process_special_arg1.rs index 001a632fd..9ae5710dd 100644 --- a/enclone_args/src/process_special_arg1.rs +++ b/enclone_args/src/process_special_arg1.rs @@ -332,34 +332,6 @@ pub fn process_special_arg1(arg: &str, ctl: &mut EncloneControl) -> Result Result").to_string(), xj.after("->").to_string()); } - } else if arg.starts_with("PLOT2=") { - *using_plot = true; - let x = arg.after("PLOT2=").split(',').collect::>(); - if x.is_empty() { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - if x.len() % 2 != 1 { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - ctl.plot_opt.plot_file = x[0].to_string(); - for j in (1..x.len()).step_by(2) { - let condition = x[j].to_string(); - let color = x[j + 1].to_string(); - if !condition.contains('=') { - return Err("\nArgument to PLOT is invalid.\n".to_string()); - } - ctl.plot_opt.plot_colors.push(color); - } } else if arg.starts_with("PLOT_BY_ISOTYPE=") { ctl.plot_opt.plot_by_isotype = true; ctl.plot_opt.plot_file = arg.after("PLOT_BY_ISOTYPE=").to_string(); diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 93efdfd0a..13a2b2e9a 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -423,7 +423,6 @@ pub struct PlotOpt { pub plot_xy_x_log10: bool, pub plot_xy_y_log10: bool, pub plot_xy_sym: bool, - pub plot_colors: Vec, pub plot_file: String, pub plot_by_isotype: bool, pub plot_by_isotype_nolegend: bool, From 5ed609927277475014affe0b5427d112f1e8a6ca Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:14:53 -0700 Subject: [PATCH 39/52] Delete unused fields from AlleleData. --- enclone_core/src/defs.rs | 2 -- enclone_stuff/src/start.rs | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 13a2b2e9a..21ec91e7f 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -461,8 +461,6 @@ pub struct AllelePrintOpt { #[derive(Clone, Default)] pub struct AlleleData { pub alt_refs: Vec<(usize, usize, DnaString, usize, bool)>, - pub var_pos: Vec>, - pub var_bases: Vec>>, } // Join printing options. diff --git a/enclone_stuff/src/start.rs b/enclone_stuff/src/start.rs index 3e9e1f356..1823d42f3 100644 --- a/enclone_stuff/src/start.rs +++ b/enclone_stuff/src/start.rs @@ -880,11 +880,7 @@ pub fn main_enclone_start( join_info, drefs, sr, - allele_data: AlleleData { - alt_refs, - var_pos: Vec::new(), - var_bases: Vec::new(), - }, + allele_data: AlleleData { alt_refs }, }, fate, )) From 653db37e4056f28efee0e5bc78f16f529543bd7e Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:16:20 -0700 Subject: [PATCH 40/52] Delete unused JoinAlgOpt field. --- enclone_core/src/defs.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 21ec91e7f..f60c549cf 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -494,7 +494,6 @@ pub struct JoinAlgOpt { pub basicx: bool, pub join_full_diff: bool, pub join_cdr3_ident: f64, - pub join_cdr12h_ident: f64, pub fwr1_cdr12_delta: f64, pub cdr3_normal_len: usize, pub auto_share: usize, From 028b066831774899166994682a3ffcb8ab8bdbb7 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:18:15 -0700 Subject: [PATCH 41/52] Delete always-false/dead fails_only param and branch. --- enclone_core/src/defs.rs | 1 - enclone_process/src/filter.rs | 3 --- 2 files changed, 4 deletions(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f60c549cf..f4ea511bc 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -539,7 +539,6 @@ pub struct ClonoFiltOpt { pub cdr3: Option, // only show clonotypes whose CDR3_AA matches regular expression pub cdr3_lev: String, // only show clonotypes whose CDR3_AA matches Levenshtein dist pattern pub protect_bads: bool, // protect bads from deletion - pub fail_only: bool, // only print fails pub seg: Vec>, // only show clonotypes using one of these VDJ segment names pub segn: Vec>, // only show clonotypes using one of these VDJ segment numbers pub nseg: Vec>, // do not show clonotypes using one of these VDJ segment names diff --git a/enclone_process/src/filter.rs b/enclone_process/src/filter.rs index 72a2b4bce..155a0b3de 100644 --- a/enclone_process/src/filter.rs +++ b/enclone_process/src/filter.rs @@ -511,9 +511,6 @@ pub fn survives_filter( }) .collect::>(); unique_sort(&mut donors); - if ctl.clono_filt_opt.fail_only && donors.len() <= 1 { - return false; - } // Inconsistent D genes. From 5a4f93245cfb802da8577b66f5e16f4009cb7c62 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:21:16 -0700 Subject: [PATCH 42/52] Delete dead chain_brief option. --- enclone/src/UNDOC_OPTIONS | 1 - enclone_args/src/proc_args.rs | 1 - enclone_core/src/defs.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/enclone/src/UNDOC_OPTIONS b/enclone/src/UNDOC_OPTIONS index 5b3d466fb..0e450927c 100644 --- a/enclone/src/UNDOC_OPTIONS +++ b/enclone/src/UNDOC_OPTIONS @@ -48,7 +48,6 @@ Optional arguments governing input and output files: Optional arguments that control printing of individual clonotypes: - white = percent of sequences implicated in whitelist expansion. -- CHAIN_BRIEF: show abbreviated chain column headers - DEBUG_TABLE_PRINTING: add print lines to help debug printing of tables. - NOTE_SIMPLE: note if the first sequence for the chain is simple, in the sense that it exactly equals the concatenation of the right-truncated V with the full J segment. diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 67b23e083..89fae1baa 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -386,7 +386,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("BCJOIN", &mut ctl.join_alg_opt.bcjoin), ("BUILT_IN", &mut ctl.gen_opt.built_in), ("CDIFF", &mut ctl.clono_filt_opt.cdiff), - ("CHAIN_BRIEF", &mut ctl.clono_print_opt.chain_brief), ("COMPLETE", &mut ctl.gen_opt.complete), ("CON", &mut ctl.allele_print_opt.con), ("CON_CON", &mut ctl.gen_opt.con_con), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f4ea511bc..ea386c20c 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -578,7 +578,6 @@ pub struct ClonoPrintOpt { pub cvars: Vec, // per-chain per-exact-clonotype columns pub lvars: Vec, // per-exact-clonotype ('lead') columns pub regex_match: Vec>>, // matching features for _g etc. - pub chain_brief: bool, // show abbreviated chain headers pub sum: bool, // print sum row pub mean: bool, // print mean row pub conx: bool, From 7bd3de7c9347088d7ed455cdb9e1916ab145b4a6 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:28:03 -0700 Subject: [PATCH 43/52] Remove unused AG_DIST_FORMULA parameter. --- enclone_args/src/proc_args.rs | 4 ---- enclone_args/src/proc_args_post.rs | 11 ++--------- enclone_core/src/defs.rs | 1 - 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 89fae1baa..7ba63f9a9 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -567,10 +567,6 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String "AG_DIST_BOUND", &mut ctl.clono_group_opt.asymmetric_dist_bound, ), - ( - "AG_DIST_FORMULA", - &mut ctl.clono_group_opt.asymmetric_dist_formula, - ), ("CLUSTAL_AA", &mut ctl.gen_opt.clustal_aa), ("CLUSTAL_DNA", &mut ctl.gen_opt.clustal_dna), ("GROUP_CDR3", &mut ctl.clono_group_opt.cdr3), diff --git a/enclone_args/src/proc_args_post.rs b/enclone_args/src/proc_args_post.rs index 64872111a..000af447a 100644 --- a/enclone_args/src/proc_args_post.rs +++ b/enclone_args/src/proc_args_post.rs @@ -327,23 +327,21 @@ pub fn proc_args_post( if ctl.clono_group_opt.style == "asymmetric" && (ctl.clono_group_opt.asymmetric_center.is_empty() - || ctl.clono_group_opt.asymmetric_dist_formula.is_empty() || ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) { return Err( "\nIf the AGROUP option is used to specify asymmetric grouping, then all\n\ - of the options AG_CENTER, AG_DIST_FORMULA and AG_DIST_BOUND must also be \ + of the options AG_CENTER and AG_DIST_BOUND must also be \ specified.\n" .to_string(), ); } if (!ctl.clono_group_opt.asymmetric_center.is_empty() - || !ctl.clono_group_opt.asymmetric_dist_formula.is_empty() || !ctl.clono_group_opt.asymmetric_dist_bound.is_empty()) && ctl.clono_group_opt.style == "symmetric" { return Err("\nIf any of the asymmetric grouping options AG_CENTER or \ - AG_DIST_FORMULA or\nAG_DIST_BOUND are specified, then the option AGROUP \ + AG_DIST_BOUND are specified, then the option AGROUP \ must also be specified, to turn on asymmetric grouping.\n" .to_string()); } @@ -357,11 +355,6 @@ pub fn proc_args_post( .to_string(), ); } - if ctl.clono_group_opt.asymmetric_dist_formula != "cdr3_edit_distance" { - return Err( - "\nThe only allowed form for AG_DIST_FORMULA is cdr3_edit_distance.\n".to_string(), - ); - } let ok1 = ctl .clono_group_opt .asymmetric_dist_bound diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index ea386c20c..b8a18e904 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -620,7 +620,6 @@ pub struct ClonoGroupOpt { // ASYMMETRIC GROUPING CONTROLS pub asymmetric_center: String, // definition of center for asymmetric grouping - pub asymmetric_dist_formula: String, // definition of distance formula for asymmetric grouping pub asymmetric_dist_bound: String, // definition of distance bound for asymmetric grouping // DEPRECATED pub vj_refname_strong: bool, // group by having the same VJ reference names, but stronger From c15f9b078ba0ef76c13056dba8c215886d76f3b9 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:29:10 -0700 Subject: [PATCH 44/52] Remove dead vj_refname_strong parameter. --- enclone_core/src/defs.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index b8a18e904..7f5dc2d2c 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -621,8 +621,6 @@ pub struct ClonoGroupOpt { // ASYMMETRIC GROUPING CONTROLS pub asymmetric_center: String, // definition of center for asymmetric grouping pub asymmetric_dist_bound: String, // definition of distance bound for asymmetric grouping - // DEPRECATED - pub vj_refname_strong: bool, // group by having the same VJ reference names, but stronger } // Parseable output options. From 34c79dd1a3687f043e95918cae4ff0437657fdd2 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:39:37 -0700 Subject: [PATCH 45/52] Move nogray into clono_print_opts. --- enclone_args/src/proc_args.rs | 2 +- enclone_core/src/defs.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 7ba63f9a9..09b55cc81 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -432,7 +432,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("NCROSS", &mut ctl.clono_filt_opt_def.ncross), ("NEWICK", &mut ctl.gen_opt.newick), ("NGEX", &mut ctl.clono_filt_opt_def.ngex), - ("NOGRAY", &mut ctl.nogray), + ("NOGRAY", &mut ctl.clono_print_opt.nogray), ("NGROUP", &mut ctl.clono_group_opt.ngroup), ("NIMPROPER", &mut ctl.merge_all_impropers), ("NMAX", &mut ctl.clono_filt_opt_def.nmax), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 7f5dc2d2c..ec723d5dc 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -582,6 +582,8 @@ pub struct ClonoPrintOpt { pub mean: bool, // print mean row pub conx: bool, pub conp: bool, + /// don't gray in per cell lines + pub nogray: bool, } // Clonotype grouping options. @@ -653,8 +655,6 @@ pub struct EncloneControl { pub plot_opt: PlotOpt, /// use escape characters to enhance view pub pretty: bool, - /// don't gray in per cell lines - pub nogray: bool, /// turn off extra logging pub silent: bool, /// make joins even if redundant From aaec17b1b376bfce5e55cb7401b63ed39f3a221d Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:42:31 -0700 Subject: [PATCH 46/52] Move more printing options into ClonoPrintOpt. --- enclone_args/src/proc_args.rs | 5 ++++- enclone_core/src/defs.rs | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 09b55cc81..9eeea7f3e 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -393,7 +393,10 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String ("CONP", &mut ctl.clono_print_opt.conp), ("CONX", &mut ctl.clono_print_opt.conx), ("CURRENT_REF", &mut ctl.gen_opt.current_ref), - ("DEBUG_TABLE_PRINTING", &mut ctl.debug_table_printing), + ( + "DEBUG_TABLE_PRINTING", + &mut ctl.clono_print_opt.debug_table_printing, + ), ("DEL", &mut ctl.clono_filt_opt.del), ("DESCRIP", &mut ctl.gen_opt.descrip), ("D_INCONSISTENT", &mut ctl.clono_filt_opt.d_inconsistent), diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index ec723d5dc..f79b3d1a2 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -584,6 +584,8 @@ pub struct ClonoPrintOpt { pub conp: bool, /// don't gray in per cell lines pub nogray: bool, + /// turn on debugging for table printing + pub debug_table_printing: bool, } // Clonotype grouping options. @@ -659,8 +661,6 @@ pub struct EncloneControl { pub silent: bool, /// make joins even if redundant pub force: bool, - /// turn on debugging for table printing - pub debug_table_printing: bool, /// merge all improper exact subclonotypes pub merge_all_impropers: bool, /// algorithmic heuristics From a9033accda79b628456c3b87adb3b44447fe52f1 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:46:42 -0700 Subject: [PATCH 47/52] Remove never-read pathlist and last_modified params. --- enclone_args/src/load_gex_core.rs | 3 --- enclone_core/src/defs.rs | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index 995bce271..d986cbc88 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -556,9 +556,6 @@ pub fn load_gex( } unique_sort(&mut r.gex_cell_barcodes); }); - for (_, r) in &results { - ctl.pathlist.extend(r.f15.iter().cloned()); - } // Test for error. diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f79b3d1a2..d10fc49fe 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -14,7 +14,7 @@ use std::cmp::max; use std::collections::HashMap; use std::sync::atomic::AtomicBool; -use std::time::{Instant, SystemTime}; +use std::time::Instant; use vdj_ann::annotate::Annotation; use vector_utils::unique_sort; @@ -685,10 +685,6 @@ pub struct EncloneControl { pub clono_group_opt: ClonoGroupOpt, /// parseable output options pub parseable_opt: ParseableOpt, - /// list of input files - pub pathlist: Vec, - /// last modified for pathlist - pub last_modified: Vec, } // Set up data structure to track clonotype data. A TigData is for one contig; From 9f5584fc5cd0bc5845ffc76d566d377038aa277e Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:50:56 -0700 Subject: [PATCH 48/52] Remove all code that populated dead pathlist. --- enclone_args/src/load_gex_core.rs | 17 +++++------------ enclone_args/src/load_gex_util.rs | 17 +++++------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index d986cbc88..755d1cc97 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -30,7 +30,6 @@ struct LoadResult { cell_type_specified: bool, error: String, h5_path: String, - f15: Vec, feature_metrics: HashMap<(String, String), String>, json_metrics: HashMap, metrics: String, @@ -70,7 +69,6 @@ pub fn load_gex( // somehow the parallelism is not working. // 2. We know where the time is spent in the loop, and this is marked below. results.par_iter_mut().for_each(|(i, r)| { - let pathlist = &mut r.f15; let i = *i; if !gex_outs[i].is_empty() { // First define the path where the GEX files should live, and make sure that the path @@ -102,7 +100,6 @@ pub fn load_gex( for x in &h5p { let p = format!("{outs}/{x}"); if path_exists(&p) { - pathlist.push(p.clone()); h5_path = p; break; } @@ -141,13 +138,13 @@ pub fn load_gex( // Find files. - let pca_file = find_pca_file(&analysis, pathlist); - let cluster_file = find_cluster_file(&analysis, pathlist); + let pca_file = find_pca_file(&analysis); + let cluster_file = find_cluster_file(&analysis); let (json_metrics_file, feature_metrics_file, metrics_file) = if !ctl.cr_opt.cellranger {( - find_json_metrics_file(&analysis, pathlist), - find_feature_metrics_file(&analysis, pathlist), - find_metrics_file(&outs, pathlist) + find_json_metrics_file(&analysis), + find_feature_metrics_file(&analysis), + find_metrics_file(&outs) )} else { Default::default() }; @@ -173,7 +170,6 @@ pub fn load_gex( ); return; } - pathlist.push(f.to_string()); } // Find metrics summary file. @@ -195,7 +191,6 @@ pub fn load_gex( for c in &csvs { if path_exists(c) { csv = c.clone(); - pathlist.push(c.to_string()); break; } } @@ -211,7 +206,6 @@ pub fn load_gex( // Read cell types. if path_exists(&types_file) { - pathlist.push(types_file.clone()); let f = open_userfile_for_read(&types_file); let mut count = 0; for line in f.lines() { @@ -540,7 +534,6 @@ pub fn load_gex( let fref_file = fnx(&outs, "feature_reference.csv"); if path_exists(&fref_file) { - pathlist.push(fref_file.clone()); r.feature_refs = read_to_string(&fref_file).unwrap(); } diff --git a/enclone_args/src/load_gex_util.rs b/enclone_args/src/load_gex_util.rs index d94fdbd38..4269549a7 100644 --- a/enclone_args/src/load_gex_util.rs +++ b/enclone_args/src/load_gex_util.rs @@ -3,30 +3,27 @@ use io_utils::{dir_list, path_exists}; use vector_utils::VecUtils; -pub fn find_pca_file(analysis: &[String], pathlist: &mut Vec) -> String { +pub fn find_pca_file(analysis: &[String]) -> String { let mut pca_file = String::new(); for x in analysis { pca_file = format!("{x}/pca/10_components/projection.csv"); if path_exists(&pca_file) { - pathlist.push(pca_file.clone()); break; } pca_file = format!("{x}/pca/gene_expression_10_components/projection.csv"); if path_exists(&pca_file) { - pathlist.push(pca_file.clone()); break; } } pca_file } -pub fn find_json_metrics_file(analysis: &[String], pathlist: &mut Vec) -> String { +pub fn find_json_metrics_file(analysis: &[String]) -> String { let mut json_metrics_file = String::new(); for x in analysis { let f = format!("{x}/metrics_summary_json.json"); if path_exists(&f) { json_metrics_file = f.clone(); - pathlist.push(f); break; } } @@ -34,13 +31,12 @@ pub fn find_json_metrics_file(analysis: &[String], pathlist: &mut Vec) - json_metrics_file } -pub fn find_feature_metrics_file(analysis: &[String], pathlist: &mut Vec) -> String { +pub fn find_feature_metrics_file(analysis: &[String]) -> String { let mut feature_metrics_file = String::new(); for x in analysis { let f = format!("{x}/per_feature_metrics.csv"); if path_exists(&f) { feature_metrics_file = f.clone(); - pathlist.push(f); break; } } @@ -48,14 +44,13 @@ pub fn find_feature_metrics_file(analysis: &[String], pathlist: &mut Vec feature_metrics_file } -pub fn find_metrics_file(outs: &str, pathlist: &mut Vec) -> String { +pub fn find_metrics_file(outs: &str) -> String { let mut metrics_file = String::new(); let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); if path_exists(&summary_dir) { let list = dir_list(&summary_dir); if list.solo() { let path = format!("{summary_dir}/{}", list[0]); - pathlist.push(path.clone()); metrics_file = path; } } @@ -63,17 +58,15 @@ pub fn find_metrics_file(outs: &str, pathlist: &mut Vec) -> String { metrics_file } -pub fn find_cluster_file(analysis: &[String], pathlist: &mut Vec) -> String { +pub fn find_cluster_file(analysis: &[String]) -> String { let mut cluster_file = String::new(); for x in analysis { cluster_file = format!("{x}/clustering/graphclust/clusters.csv"); if path_exists(&cluster_file) { - pathlist.push(cluster_file.clone()); break; } cluster_file = format!("{x}/clustering/gene_expression_graphclust/clusters.csv"); if path_exists(&cluster_file) { - pathlist.push(cluster_file.clone()); break; } } From 319e8a81684acbe8209cc6b2bc38ca4ffef3af66 Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 13:57:03 -0700 Subject: [PATCH 49/52] Remove unused loading/storing of feature_refs. --- enclone_args/src/lib.rs | 10 ---------- enclone_args/src/load_gex.rs | 3 --- enclone_args/src/load_gex_core.rs | 14 ++------------ enclone_core/src/defs.rs | 1 - 4 files changed, 2 insertions(+), 26 deletions(-) diff --git a/enclone_args/src/lib.rs b/enclone_args/src/lib.rs index 45cf34085..842cc7fdf 100644 --- a/enclone_args/src/lib.rs +++ b/enclone_args/src/lib.rs @@ -1,8 +1,6 @@ // Copyright (c) 2021 10x Genomics, Inc. All rights reserved. #![allow(clippy::needless_range_loop)] -use io_utils::path_exists; - pub mod load_gex; pub mod load_gex_core; pub mod load_gex_util; @@ -41,11 +39,3 @@ pub fn parse_csv_pure(x: &str) -> Vec<&str> { } y } - -pub fn fnx(outs: &str, name: &str) -> String { - let mut file = format!("{outs}/../{name}"); - if !path_exists(&file) { - file = format!("{outs}/{name}"); - } - file -} diff --git a/enclone_args/src/load_gex.rs b/enclone_args/src/load_gex.rs index ad4370ac6..bf552e999 100644 --- a/enclone_args/src/load_gex.rs +++ b/enclone_args/src/load_gex.rs @@ -19,7 +19,6 @@ use vector_utils::{bin_position, unique_sort}; pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { let mut gex_features = Vec::>::new(); let mut gex_barcodes = Vec::>::new(); - let mut feature_refs = Vec::::new(); let mut cluster = Vec::>::new(); let mut cell_type = Vec::>::new(); let mut cell_type_specified = Vec::::new(); @@ -37,7 +36,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { ctl, &mut gex_features, &mut gex_barcodes, - &mut feature_refs, &mut cluster, &mut cell_type, &mut cell_type_specified, @@ -137,7 +135,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { Ok(GexInfo { gex_features, gex_barcodes, - feature_refs, cluster, cell_type, cell_type_specified, diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index 755d1cc97..e2289b539 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -6,14 +6,14 @@ use crate::load_gex_util::{ find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_metrics_file, find_pca_file, }; -use crate::{fnx, parse_csv_pure}; +use crate::parse_csv_pure; use enclone_core::defs::EncloneControl; use enclone_core::slurp::slurp_h5; use io_utils::{dir_list, open_for_read, open_userfile_for_read, path_exists}; use itertools::Itertools; use rayon::prelude::*; use serde_json::Value; -use std::{collections::HashMap, fmt::Write, fs::read_to_string, io::BufRead}; +use std::{collections::HashMap, fmt::Write, io::BufRead}; use string_utils::{parse_csv, TextUtils}; use vector_utils::{unique_sort, VecUtils}; @@ -33,14 +33,12 @@ struct LoadResult { feature_metrics: HashMap<(String, String), String>, json_metrics: HashMap, metrics: String, - feature_refs: String, } pub fn load_gex( ctl: &mut EncloneControl, gex_features: &mut Vec>, gex_barcodes: &mut Vec>, - feature_refs: &mut Vec, cluster: &mut Vec>, cell_type: &mut Vec>, cell_type_specified: &mut Vec, @@ -530,13 +528,6 @@ pub fn load_gex( r.gex_mult = gene_mult; r.fb_mult = fb_mult; - // Read the feature reference file. - - let fref_file = fnx(&outs, "feature_reference.csv"); - if path_exists(&fref_file) { - r.feature_refs = read_to_string(&fref_file).unwrap(); - } - // Read the feature barcode matrix file. if let Err(err) = slurp_h5( &h5_path, @@ -619,7 +610,6 @@ pub fn load_gex( feature_metrics.push(r.feature_metrics); json_metrics.push(r.json_metrics); metrics.push(r.metrics); - feature_refs.push(r.feature_refs); } // Done. diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index d10fc49fe..91509584b 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -907,7 +907,6 @@ pub struct CloneInfo { pub struct GexInfo { pub gex_features: Vec>, pub gex_barcodes: Vec>, - pub feature_refs: Vec, pub gex_cell_barcodes: Vec>, pub cluster: Vec>, pub cell_type: Vec>, From ec6e76aa00ef2a5d3176e62447480e93ff265bbb Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 14:07:53 -0700 Subject: [PATCH 50/52] Rip out entirely-unused metrics loading code. --- enclone_args/src/lib.rs | 27 ----------- enclone_args/src/load_gex.rs | 3 -- enclone_args/src/load_gex_core.rs | 81 ++----------------------------- enclone_args/src/load_gex_util.rs | 17 +------ enclone_core/src/defs.rs | 1 - 5 files changed, 4 insertions(+), 125 deletions(-) diff --git a/enclone_args/src/lib.rs b/enclone_args/src/lib.rs index 842cc7fdf..8c6460b31 100644 --- a/enclone_args/src/lib.rs +++ b/enclone_args/src/lib.rs @@ -12,30 +12,3 @@ pub mod proc_args_post; pub mod process_special_arg1; pub mod process_special_arg2; pub mod read_json; - -// parse_csv_pure: same as parse_csv, but don't strip out quotes - -pub fn parse_csv_pure(x: &str) -> Vec<&str> { - let w = x.char_indices().collect::>(); - let mut y = Vec::new(); - let (mut quotes, mut i) = (0, 0); - while i < w.len() { - let mut j = i; - while j < w.len() { - if quotes % 2 == 0 && w[j].1 == ',' { - break; - } - if w[j].1 == '"' { - quotes += 1; - } - j += 1; - } - let (start, stop) = (w[i].0, w.get(j).map_or(x.len(), |(ind, _)| *ind)); - y.push(&x[start..stop]); - i = j + 1; - } - if !w.is_empty() && w.last().unwrap().1 == ',' { - y.push(""); - } - y -} diff --git a/enclone_args/src/load_gex.rs b/enclone_args/src/load_gex.rs index bf552e999..b6c2f0123 100644 --- a/enclone_args/src/load_gex.rs +++ b/enclone_args/src/load_gex.rs @@ -31,7 +31,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { let mut h5_paths = Vec::::new(); let mut feature_metrics = Vec::>::new(); let mut json_metrics = Vec::>::new(); - let mut metrics = Vec::::new(); load_gex( ctl, &mut gex_features, @@ -48,7 +47,6 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { &mut h5_paths, &mut feature_metrics, &mut json_metrics, - &mut metrics, )?; if ctl.gen_opt.gene_scan.is_some() && !ctl.gen_opt.accept_inconsistent { let mut allf = gex_features.clone(); @@ -151,6 +149,5 @@ pub fn get_gex_info(ctl: &mut EncloneControl) -> Result { have_fb, feature_metrics, json_metrics, - metrics, }) } diff --git a/enclone_args/src/load_gex_core.rs b/enclone_args/src/load_gex_core.rs index e2289b539..6ee47b239 100644 --- a/enclone_args/src/load_gex_core.rs +++ b/enclone_args/src/load_gex_core.rs @@ -3,17 +3,14 @@ // Load gene expression and feature barcoding (antibody, antigen) data from Cell Ranger outputs. use crate::load_gex_util::{ - find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_metrics_file, - find_pca_file, + find_cluster_file, find_feature_metrics_file, find_json_metrics_file, find_pca_file, }; -use crate::parse_csv_pure; use enclone_core::defs::EncloneControl; use enclone_core::slurp::slurp_h5; use io_utils::{dir_list, open_for_read, open_userfile_for_read, path_exists}; -use itertools::Itertools; use rayon::prelude::*; use serde_json::Value; -use std::{collections::HashMap, fmt::Write, io::BufRead}; +use std::{collections::HashMap, io::BufRead}; use string_utils::{parse_csv, TextUtils}; use vector_utils::{unique_sort, VecUtils}; @@ -32,7 +29,6 @@ struct LoadResult { h5_path: String, feature_metrics: HashMap<(String, String), String>, json_metrics: HashMap, - metrics: String, } pub fn load_gex( @@ -51,7 +47,6 @@ pub fn load_gex( h5_paths: &mut Vec, feature_metrics: &mut Vec>, json_metrics: &mut Vec>, - metrics: &mut Vec, ) -> Result<(), String> { let mut results = Vec::<(usize, LoadResult)>::new(); for i in 0..ctl.origin_info.gex_path.len() { @@ -139,10 +134,9 @@ pub fn load_gex( let pca_file = find_pca_file(&analysis); let cluster_file = find_cluster_file(&analysis); - let (json_metrics_file, feature_metrics_file, metrics_file) = if !ctl.cr_opt.cellranger {( + let (json_metrics_file, feature_metrics_file) = if !ctl.cr_opt.cellranger {( find_json_metrics_file(&analysis), find_feature_metrics_file(&analysis), - find_metrics_file(&outs) )} else { Default::default() }; @@ -245,38 +239,6 @@ pub fn load_gex( } } - // Read and parse metrics file. Rewrite as metrics class, metric name, metric value. - - if !metrics_file.is_empty() { - let m = std::fs::read_to_string(&metrics_file).unwrap(); - let fields = parse_csv_pure(m.before("\n")); - let (mut class, mut name, mut value) = (None, None, None); - for field in fields { - if field == "Library Type" { - class = Some(i); - } else if field == "Metric Name" { - name = Some(i); - } else if field == "Metric Value" { - value = Some(i); - } - } - let (class, name, value) = (class.unwrap(), name.unwrap(), value.unwrap()); - let mut lines = Vec::::new(); - let mut first = true; - for line in m.lines() { - if first { - first = false; - } else { - let fields = parse_csv_pure(line); - lines.push(format!( - "{},{},{}", - fields[class], fields[name], fields[value] - )); - } - } - r.metrics = format!("{}\n", lines.iter().format("\n")); - } - // Read feature metrics file. Note that we do not enforce the requirement of this // file, so it may not be present. @@ -561,42 +523,6 @@ pub fn load_gex( } h5_paths.extend(results.iter().map(|(_, r)| r.h5_path.clone())); - // Add some metrics. - - let extras = [ - ( - "ANTIBODY_G_perfect_homopolymer_frac", - "Antibody Capture,G Homopolymer Frac", - ), - ( - "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_unique_genes_detected", - "Gene Expression,GRCh38 Median genes per cell (20k raw reads per cell)", - ), - ( - "GRCh38_raw_rpc_20000_subsampled_filtered_bcs_median_counts", - "Gene Expression,GRCh38 Median UMI counts per cell (20k raw reads per cell)", - ), - ]; - for x in &extras { - let metric_name = x.0.to_string(); - let metric_display_name = x.1.to_string(); - let mut have = false; - for (_, result) in &results { - if result.json_metrics.contains_key(&metric_name) { - have = true; - } - } - if have { - for (_, result) in &mut results { - let mut value = String::new(); - if result.json_metrics.contains_key(&metric_name) { - value = format!("{:.3}", result.json_metrics[&metric_name]); - } - writeln!(result.metrics, "{metric_display_name},{value}").unwrap(); - } - } - } - for (_, r) in results { gex_features.push(r.gex_features); gex_barcodes.push(r.gex_barcodes); @@ -609,7 +535,6 @@ pub fn load_gex( cell_type_specified.push(r.cell_type_specified); feature_metrics.push(r.feature_metrics); json_metrics.push(r.json_metrics); - metrics.push(r.metrics); } // Done. diff --git a/enclone_args/src/load_gex_util.rs b/enclone_args/src/load_gex_util.rs index 4269549a7..e7abc2a54 100644 --- a/enclone_args/src/load_gex_util.rs +++ b/enclone_args/src/load_gex_util.rs @@ -1,7 +1,6 @@ // Copyright (c) 2022 10X Genomics, Inc. All rights reserved. -use io_utils::{dir_list, path_exists}; -use vector_utils::VecUtils; +use io_utils::path_exists; pub fn find_pca_file(analysis: &[String]) -> String { let mut pca_file = String::new(); @@ -44,20 +43,6 @@ pub fn find_feature_metrics_file(analysis: &[String]) -> String { feature_metrics_file } -pub fn find_metrics_file(outs: &str) -> String { - let mut metrics_file = String::new(); - let summary_dir = format!("{outs}/../multi_web_summary_json/metrics_summary_csv"); - if path_exists(&summary_dir) { - let list = dir_list(&summary_dir); - if list.solo() { - let path = format!("{summary_dir}/{}", list[0]); - metrics_file = path; - } - } - - metrics_file -} - pub fn find_cluster_file(analysis: &[String]) -> String { let mut cluster_file = String::new(); for x in analysis { diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index 91509584b..f73b54145 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -923,7 +923,6 @@ pub struct GexInfo { pub have_fb: bool, pub feature_metrics: Vec>, pub json_metrics: Vec>, - pub metrics: Vec, } // Every entry in a ColInfo is a vector whose number of entries is the number of chains From 7c4df1f3ec79e6274d9ac608e717ba833b3ec34c Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 14:53:57 -0700 Subject: [PATCH 51/52] Add validation for readability/writability. --- enclone_args/src/proc_args.rs | 6 +-- enclone_args/src/proc_args2.rs | 35 ---------------- enclone_args/src/process_special_arg1.rs | 3 +- enclone_core/src/defs.rs | 51 ++++++++++++------------ enclone_core/src/lib.rs | 40 +++++++++++++++++-- 5 files changed, 65 insertions(+), 70 deletions(-) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 9eeea7f3e..82f7198fc 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -1,14 +1,12 @@ // Copyright (c) 2021 10X Genomics, Inc. All rights reserved. -use crate::proc_args2::{ - is_f64_arg, is_i32_arg, is_simple_arg, is_string_arg, is_usize_arg, test_writeable, -}; +use crate::proc_args2::{is_f64_arg, is_i32_arg, is_simple_arg, is_string_arg, is_usize_arg}; use crate::proc_args_post::proc_args_post; use crate::process_special_arg1::process_special_arg1; use crate::process_special_arg2::process_special_arg2; use enclone_core::defs::{ClonotypeHeuristics, EncloneControl}; use enclone_core::test_def::replace_at_test; -use enclone_core::{require_readable_file, tilde_expand_me}; +use enclone_core::{require_readable_file, test_writeable, tilde_expand_me}; use itertools::Itertools; use std::fmt::Write; use std::process::Command; diff --git a/enclone_args/src/proc_args2.rs b/enclone_args/src/proc_args2.rs index 6a92c0ff9..ae06c2253 100644 --- a/enclone_args/src/proc_args2.rs +++ b/enclone_args/src/proc_args2.rs @@ -3,47 +3,12 @@ use enclone_core::defs::EncloneControl; use io_utils::{open_userfile_for_read, path_exists}; use rayon::prelude::*; -use std::fmt::Write; -use std::fs::{remove_file, File}; use std::io::BufRead; use string_utils::TextUtils; use vector_utils::next_diff; // ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ -// Test a file for writeability by writing and then deleting it. - -pub fn test_writeable(val: &str, evil_eye: bool) -> Result<(), String> { - if evil_eye { - println!("creating file {val} to test writability"); - } - let f = File::create(val); - if f.is_err() { - let mut msgx = - format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); - if val.contains('/') { - let dir = val.rev_before("/"); - let msg = if path_exists(dir) { - "exists" - } else { - "does not exist" - }; - writeln!(msgx, "Note that the path {dir} {msg}.").unwrap(); - } - return Err(msgx); - } - if evil_eye { - println!("removing file {val}"); - } - remove_file(val).unwrap_or_else(|_| panic!("could not remove file {val}")); - if evil_eye { - println!("removal of file {val} complete"); - } - Ok(()) -} - -// ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ - // Simple arguments. We test for e.g. PLAIN or PLAIN=, the latter to allow for the case // where the argument has been set by an environment variable. diff --git a/enclone_args/src/process_special_arg1.rs b/enclone_args/src/process_special_arg1.rs index d69e33786..04280d6b0 100644 --- a/enclone_args/src/process_special_arg1.rs +++ b/enclone_args/src/process_special_arg1.rs @@ -2,13 +2,12 @@ // Process a special argument, i.e. one that does not fit into a neat bucket. -use crate::proc_args2::test_writeable; use crate::proc_args2::{is_simple_arg, is_usize_arg}; use enclone_core::cell_color::{ CellColor, ColorByCategoricalVariableValue, ColorByDataset, ColorByVariableValue, }; use enclone_core::defs::EncloneControl; -use enclone_core::tilde_expand_me; +use enclone_core::{test_writeable, tilde_expand_me}; use enclone_vars::encode_arith; use evalexpr::build_operator_tree; use expr_tools::test_functions_in_node; diff --git a/enclone_core/src/defs.rs b/enclone_core/src/defs.rs index f73b54145..2ccc9d813 100644 --- a/enclone_core/src/defs.rs +++ b/enclone_core/src/defs.rs @@ -2,8 +2,8 @@ use crate::cell_color::CellColor; use crate::linear_condition::LinearCondition; -use crate::tilde_expand_me; -use anyhow::Result; +use crate::{require_readable_file, test_writeable, tilde_expand_me}; +use anyhow::{anyhow, ensure, Result}; use debruijn::dna_string::DnaString; use evalexpr::Node; use hdf5::Dataset; @@ -171,37 +171,37 @@ impl CellrangerOpt { let arg_name = pieces.next().unwrap(); let mut get_rest = || { let result = pieces.join("="); - assert!(!result.is_empty(), "no value provided for {arg_name}"); - result + ensure!(!result.is_empty(), "no value provided for {arg_name}"); + Ok(result) }; match arg_name { "CELLRANGER" => { cr_opts.cellranger = true; } "PRE" => { - cr_opts.pre = get_rest().split(',').map(str::to_string).collect(); + cr_opts.pre = get_rest()?.split(',').map(str::to_string).collect(); } "REF" => { - cr_opts.refname = get_rest(); + cr_opts.refname = ensure_readable_file(get_rest()?, arg_name)?; } "META" => { - for meta in get_rest().split(',') { + for meta in get_rest()?.split(',') { let mut f = meta.to_string(); tilde_expand_me(&mut f); cr_opts.metas.push(f); } } "DONOR_REF_FILE" => { - cr_opts.dref_file = get_rest(); + cr_opts.dref_file = ensure_writable_file(get_rest()?)?; } "PROTO" => { - cr_opts.proto = get_rest(); + cr_opts.proto = ensure_writable_file(get_rest()?)?; } "PROTO_METADATA" => { - cr_opts.proto_metadata = get_rest(); + cr_opts.proto_metadata = ensure_readable_file(get_rest()?, arg_name)?; } "FATE_FILE" => { - cr_opts.fate_file = get_rest(); + cr_opts.fate_file = ensure_writable_file(get_rest()?)?; } "GAMMA_DELTA" => { cr_opts.gamma_delta = true; @@ -228,27 +228,28 @@ impl CellrangerOpt { cr_opts.signature = false; } _ => { - // FIXME unused_args.push(arg.clone()); } } } Ok((cr_opts, unused_args)) } +} - /// Validate parsed options. - pub fn validate(&self) -> Result<()> { - if !self.dref_file.is_empty() { - // TODO: test writability - } - if !self.fate_file.is_empty() { - // TODO: test writability - } - if !self.refname.is_empty() { - // TODO: check readability - } - Ok(()) - } +/// Ensure that a path points to a readable file. +/// Expand ~ into home directories. +fn ensure_readable_file(mut path: String, arg: &str) -> Result { + tilde_expand_me(&mut path); + require_readable_file(&path, arg).map_err(|e| anyhow!(e))?; + Ok(path) +} + +/// Ensure that we can write to a path by writing an empty file there. +/// Expand ~ into home directories. +fn ensure_writable_file(mut path: String) -> Result { + tilde_expand_me(&mut path); + test_writeable(&path, false).map_err(|e| anyhow!(e))?; + Ok(path) } // Miscellaneous general options. diff --git a/enclone_core/src/lib.rs b/enclone_core/src/lib.rs index 04ed6399b..48e519c2d 100644 --- a/enclone_core/src/lib.rs +++ b/enclone_core/src/lib.rs @@ -20,9 +20,12 @@ pub mod stringulate; pub mod test_def; pub mod var_reg; +use io_utils::path_exists; use std::cmp::max; use std::fmt::Write; +use std::fs::{remove_file, File}; use std::io::BufRead; +use string_utils::TextUtils; #[cfg(not(target_os = "windows"))] use string_utils::stringme; @@ -133,10 +136,9 @@ pub fn parse_bsv(x: &str) -> Vec<&str> { args } -// Test to see if a line can be read from the given file f. If not, return an error message -// the references arg, which is supposed to be the name of a command line argument from which -// f originated. - +/// Test to see if a line can be read from the given file f. If not, return an error message +/// the references arg, which is supposed to be the name of a command line argument from which +/// f originated. pub fn require_readable_file(f: &str, arg: &str) -> Result<(), String> { let x = std::fs::File::open(f); if x.is_err() { @@ -163,3 +165,33 @@ pub fn require_readable_file(f: &str, arg: &str) -> Result<(), String> { } Ok(()) } + +/// Test a file for writeability by writing and then deleting it. +pub fn test_writeable(val: &str, evil_eye: bool) -> Result<(), String> { + if evil_eye { + println!("creating file {val} to test writability"); + } + let f = File::create(val); + if f.is_err() { + let mut msgx = + format!("\nYou've specified an output file\n{val}\nthat cannot be written.\n"); + if val.contains('/') { + let dir = val.rev_before("/"); + let msg = if path_exists(dir) { + "exists" + } else { + "does not exist" + }; + writeln!(msgx, "Note that the path {dir} {msg}.").unwrap(); + } + return Err(msgx); + } + if evil_eye { + println!("removing file {val}"); + } + remove_file(val).unwrap_or_else(|_| panic!("could not remove file {val}")); + if evil_eye { + println!("removal of file {val} complete"); + } + Ok(()) +} From 1f4f0d7a6a0ce67190e64f8dd63146cb8c87cd2a Mon Sep 17 00:00:00 2001 From: Chris Macklin Date: Thu, 18 Apr 2024 15:06:18 -0700 Subject: [PATCH 52/52] Restore handling of FORCE_EXTERNAL. --- enclone_args/src/proc_args.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/enclone_args/src/proc_args.rs b/enclone_args/src/proc_args.rs index 82f7198fc..34aea000f 100644 --- a/enclone_args/src/proc_args.rs +++ b/enclone_args/src/proc_args.rs @@ -616,6 +616,7 @@ pub fn proc_args(ctl: &mut EncloneControl, args: &[String]) -> Result<(), String let set_nothing_simple = [ "DUMP_INTERNAL_IDS", "EVIL_EYE", + "FORCE_EXTERNAL", "LONG_HELP", "MARKED_B", "MARK_STATS",