From 5f46a2fdb86119372097d403af58cc1cab822f6f Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Wed, 16 Oct 2024 10:01:54 +0200 Subject: [PATCH 1/2] Fix bug with empty aggregated_unassigned_indexes --- taca/element/Element_Runs.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/taca/element/Element_Runs.py b/taca/element/Element_Runs.py index 02ca8810..afeb2777 100644 --- a/taca/element/Element_Runs.py +++ b/taca/element/Element_Runs.py @@ -1174,10 +1174,11 @@ def aggregate_stats_unassigned( ) # Write to a new UnassignedSequences.csv file under demux_dir - aggregated_unassigned_csv = os.path.join( - self.run_dir, self.demux_dir, "UnassignedSequences.csv" - ) - self.write_to_csv(aggregated_unassigned_indexes, aggregated_unassigned_csv) + if aggregated_unassigned_indexes: + aggregated_unassigned_csv = os.path.join( + self.run_dir, self.demux_dir, "UnassignedSequences.csv" + ) + self.write_to_csv(aggregated_unassigned_indexes, aggregated_unassigned_csv) # Aggregate demux results def aggregate_demux_results(self, demux_results_dirs): From 340de287301c7a7b5e1dbe86cd7016cfdafba30e Mon Sep 17 00:00:00 2001 From: chuan-wang Date: Wed, 16 Oct 2024 10:36:54 +0200 Subject: [PATCH 2/2] Fix wrong logic for collecting unassigned indexes --- VERSIONLOG.md | 4 ++++ taca/element/Element_Runs.py | 30 ++++++++---------------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/VERSIONLOG.md b/VERSIONLOG.md index 2b5fed5c..2e6d992b 100644 --- a/VERSIONLOG.md +++ b/VERSIONLOG.md @@ -1,5 +1,9 @@ # TACA Version Log +## 20241016.1 + +Fix wrong logic for collecting unassigned indexes + ## 20241011.1 Fix issue with 0 lane number; Add percentage of unassigned in total unassigned per lane diff --git a/taca/element/Element_Runs.py b/taca/element/Element_Runs.py index afeb2777..fb2ce2b2 100644 --- a/taca/element/Element_Runs.py +++ b/taca/element/Element_Runs.py @@ -1040,25 +1040,11 @@ def aggregate_stats_unassigned( # Order: from longer to shorter indexes sub_demux_with_shorter_index_lens = sub_demux_list[1:] for sub_demux in sub_demux_with_shorter_index_lens: - unassigned_csv = os.path.join( - self.run_dir, - f"Demultiplexing_{sub_demux}", - "UnassignedSequences.csv", - ) - if os.path.exists(unassigned_csv): - with open(unassigned_csv) as unassigned_file: - reader = csv.DictReader(unassigned_file) - unassigned_indexes = [row for row in reader] - else: - logger.warning( - f"No {os.path.basename(unassigned_csv)} file found for sub-demultiplexing {sub_demux}." - ) - continue - # Filter by lane - unassigned_indexes = [ - unassigned_index - for unassigned_index in unassigned_indexes - if unassigned_index["Lane"] == lane + sub_demux_assigned_indexes = [ + sub_demux_assigned_index + for sub_demux_assigned_index in aggregated_assigned_indexes_filtered_sorted + if sub_demux_assigned_index["sub_demux_count"] == sub_demux + and sub_demux_assigned_index["Lane"] == lane ] # Remove overlapped indexes from the list of max_unassigned_indexes idx1_overlapped_len = min( @@ -1085,11 +1071,11 @@ def aggregate_stats_unassigned( if demux_lens_pair[0] == sub_demux_with_max_index_lens ][0][1], ) - for unassigned_index in unassigned_indexes: - idx1_overlapped_seq = unassigned_index["I1"][ + for sub_demux_assigned_index in sub_demux_assigned_indexes: + idx1_overlapped_seq = sub_demux_assigned_index["I1"][ :idx1_overlapped_len ] - idx2_overlapped_seq = unassigned_index["I2"][ + idx2_overlapped_seq = sub_demux_assigned_index["I2"][ :idx2_overlapped_len ] # Remove the overlapped record from the max_unassigned_indexes list