Skip to content

Commit

Permalink
Merge pull request #18 from chuan-wang/dev
Browse files Browse the repository at this point in the history
Fix issue with 0 lane number; Add percentage int total unassigned
  • Loading branch information
ssjunnebo authored Oct 14, 2024
2 parents d0c402a + 9b1a495 commit 2eff071
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 5 deletions.
4 changes: 4 additions & 0 deletions VERSIONLOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# TACA Version Log

## 20241011.1

Fix issue with 0 lane number; Add percentage of unassigned in total unassigned per lane

## 20241008.1

Add support for processing Element Aviti data
Expand Down
62 changes: 57 additions & 5 deletions taca/element/Element_Runs.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,8 +986,12 @@ def aggregate_stats_assigned(self, demux_runmanifest):
aggregated_assigned_indexes_filtered_sorted, aggregated_assigned_indexes_csv
)

return aggregated_assigned_indexes_filtered_sorted

# Aggregate stats in UnassignedSequences.csv
def aggregate_stats_unassigned(self, demux_runmanifest):
def aggregate_stats_unassigned(
self, demux_runmanifest, aggregated_assigned_indexes_filtered_sorted
):
aggregated_unassigned_indexes = []
lanes = sorted(list(set(sample["Lane"] for sample in demux_runmanifest)))
for lane in lanes:
Expand Down Expand Up @@ -1105,12 +1109,42 @@ def aggregate_stats_unassigned(self, demux_runmanifest):
aggregated_unassigned_indexes, key=lambda x: (x["Lane"], -int(x["Count"]))
)
# Fetch PFCount for each lane
# to calculate % of unassigned index in total lane PF polonies
pfcount_lane = {}
if os.path.exists(self.run_stats_file):
with open(self.run_stats_file) as stats_json:
aviti_runstats_json = json.load(stats_json)
for lane_stats in aviti_runstats_json["LaneStats"]:
pfcount_lane[str(lane_stats["Lane"])] = float(lane_stats["PFCount"])
# Check whether the lane numbers match between the run stat json and run manifests
if len(aviti_runstats_json["LaneStats"]) != len(lanes):
logger.warning(
f"Inconsistent lane numbers between the {os.path.basename(self.run_stats_file)} file and run manifests!"
)
else:
# When there is no RunManifest uploaded at the sequencer, the lane numbers will all be 0
# In this case we assume that the lanes are ordered by their numbers
if all(
lane_stats["Lane"] == 0
for lane_stats in aviti_runstats_json["LaneStats"]
):
lane_counter = 1
for lane_stats in aviti_runstats_json["LaneStats"]:
pfcount_lane[str(lane_counter)] = float(lane_stats["PFCount"])
lane_counter += 1
# Otherwise we parse the PF counts by matching the lane numbers
else:
for lane_stats in aviti_runstats_json["LaneStats"]:
pfcount_lane[str(lane_stats["Lane"])] = float(
lane_stats["PFCount"]
)
# Prepare the dict for pf assigned count for each lane
pf_assigned_lane = {}
for sample in aggregated_assigned_indexes_filtered_sorted:
lane = sample["Lane"]
num_polonies_assigned = int(sample["NumPoloniesAssigned"])
if lane in pf_assigned_lane:
pf_assigned_lane[lane] += num_polonies_assigned
else:
pf_assigned_lane[lane] = num_polonies_assigned
# Modify the % Polonies values based on PFCount for each lane
for unassigned_index in aggregated_unassigned_indexes:
if pfcount_lane.get(unassigned_index["Lane"]):
Expand All @@ -1119,6 +1153,20 @@ def aggregate_stats_unassigned(self, demux_runmanifest):
/ pfcount_lane[unassigned_index["Lane"]]
* 100
)
# Calculate the % Polonies values in the total unassigned for each lane
if pf_assigned_lane.get(unassigned_index["Lane"]):
unassigned_index["% Unassigned"] = (
float(unassigned_index["Count"])
/ (
pfcount_lane[unassigned_index["Lane"]]
- pf_assigned_lane[unassigned_index["Lane"]]
)
* 100
)
else:
unassigned_index["% Unassigned"] = 0
else:
unassigned_index["% Polonies"] = 0
else:
logger.warning(
f"No {os.path.basename(self.run_stats_file)} file found for the run."
Expand All @@ -1143,9 +1191,13 @@ def aggregate_demux_results(self, demux_results_dirs):
# Symlink the output FastQ files of undet only if a lane does not have multiple demux
self.aggregate_undet_fastq(demux_runmanifest)
# Aggregate stats in IndexAssignment.csv
self.aggregate_stats_assigned(demux_runmanifest)
aggregated_assigned_indexes_filtered_sorted = self.aggregate_stats_assigned(
demux_runmanifest
)
# Aggregate stats in UnassignedSequences.csv
self.aggregate_stats_unassigned(demux_runmanifest)
self.aggregate_stats_unassigned(
demux_runmanifest, aggregated_assigned_indexes_filtered_sorted
)

def sync_metadata(self):
files_to_copy = [
Expand Down

0 comments on commit 2eff071

Please sign in to comment.