Skip to content

Commit

Permalink
Merge pull request #1395 from mskcc/master
Browse files Browse the repository at this point in the history
master to develop
  • Loading branch information
sivkovic authored Dec 26, 2024
2 parents f90d421 + 1fa4931 commit 6ea0d1d
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 14 deletions.
2 changes: 1 addition & 1 deletion beagle/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.89.0"
__version__ = "1.89.1"
3 changes: 2 additions & 1 deletion runner/operator/access/heme/nucleo/heme_nucleo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import defaultdict

from runner.operator.operator import Operator
from runner.operator.helper import pair_samples
from runner.run.objects.run_creator_object import RunCreator
from file_system.repository.file_repository import FileRepository

Expand Down Expand Up @@ -85,7 +86,7 @@ def construct_sample_inputs(samples, request_id):
sample_group = list(sample_group)
sample_id = sample_group[0]["metadata"][settings.CMO_SAMPLE_NAME_METADATA_KEY]

fgbio_fastq_to_bam_input = group_by_run(sample_group)
fgbio_fastq_to_bam_input = pair_samples(sample_group)
fgbio_fastq_to_bam_input = [
[
{"class": "File", "location": "juno://" + s[0]["path"]},
Expand Down
9 changes: 2 additions & 7 deletions runner/operator/access/v2_1_0/nucleo/access_nucleo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from collections import defaultdict

from runner.operator.operator import Operator
from runner.operator.helper import pair_samples
from runner.run.objects.run_creator_object import RunCreator
from file_system.repository.file_repository import FileRepository

Expand Down Expand Up @@ -45,12 +46,6 @@ def group_by_sample_id(samples):
return sample_pairs


def group_by_run(samples):
samples.sort(key=lambda s: s["path"].split("/")[-1])
fastqs = zip(samples[::2], samples[1::2])
return list(fastqs)


def calc_avg(sample_files, field):
samples_with_field = list(filter(lambda s: field in s["metadata"] and s["metadata"][field], sample_files))
field_count = len(samples_with_field)
Expand Down Expand Up @@ -85,7 +80,7 @@ def construct_sample_inputs(samples, request_id):
sample_group = list(sample_group)
sample_id = sample_group[0]["metadata"][settings.CMO_SAMPLE_NAME_METADATA_KEY]

fgbio_fastq_to_bam_input = group_by_run(sample_group)
fgbio_fastq_to_bam_input = pair_samples(sample_group)
fgbio_fastq_to_bam_input = [
[
{"class": "File", "location": "juno://" + s[0]["path"]},
Expand Down
38 changes: 33 additions & 5 deletions runner/operator/cmo_ch/v2_1_0/nucleo/nucleo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from collections import defaultdict

from runner.operator.operator import Operator
from runner.operator.helper import pair_samples
from runner.run.objects.run_creator_object import RunCreator
from file_system.repository.file_repository import FileRepository
from django.conf import settings
Expand Down Expand Up @@ -42,10 +43,37 @@ def group_by_sample_id(samples):
return sample_pairs


def group_by_run(samples):
samples.sort(key=lambda s: s["path"].split("/")[-1])
fastqs = zip(samples[::2], samples[1::2])
return list(fastqs)
def pair_samples(fastqs):
"""
pair sample fastqs based on the delivery directory.
Parameters:
fastqs (list): A list of sample fastq files.
Returns:
list: A list of tuples containing paired fastqs for a sample
"""
sample_pairs = []
expected_pair = set(["R1", "R2"])
# match R1 and R2 based on delivery directory
# sorting on file names is not enough as they are non-unique
for i, fastq in enumerate(fastqs):
dir = "/".join(fastq["path"].split("_R")[0:-1])
for compare in fastqs[i + 1 :]:
compare_dir = "/".join(compare["path"].split("_R")[0:-1])
if dir == compare_dir:
# check if R1 and R2 are present
r_check = set([fastq["metadata"]["R"], compare["metadata"]["R"]])
if r_check.issubset(expected_pair):
# Keep ordering consistent
if fastq["metadata"]["R"] == "R1":
sample_pairs.append((fastq, compare))
else:
sample_pairs.append((compare, fastq))
else:
sample_name = fastq["metadata"]["cmoSampleName"]
raise Exception(f"Improper pairing for: {sample_name}")
return sample_pairs


def calc_avg(sample_files, field):
Expand Down Expand Up @@ -81,7 +109,7 @@ def construct_sample_inputs(samples, request_id):
sample_group = list(sample_group)
sample_id = sample_group[0]["metadata"][settings.CMO_SAMPLE_NAME_METADATA_KEY]

fgbio_fastq_to_bam_input = group_by_run(sample_group)
fgbio_fastq_to_bam_input = pair_samples(sample_group)
fgbio_fastq_to_bam_input = [
[
{"class": "File", "location": "juno://" + s[0]["path"]},
Expand Down
33 changes: 33 additions & 0 deletions runner/operator/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,36 @@ def init_metadata():
metadata["runId"] = ""
metadata["preservation"] = ""
return metadata


def pair_samples(fastqs):
"""
pair sample fastqs based on the delivery directory.
Parameters:
fastqs (list): A list of sample fastq files.
Returns:
list: A list of tuples containing paired fastqs for a sample
"""
sample_pairs = []
expected_pair = set(["R1", "R2"])
# match R1 and R2 based on delivery directory
# sorting on file names is not enough as they are non-unique
for i, fastq in enumerate(fastqs):
dir = "/".join(fastq["path"].split("_R")[0:-1])
for compare in fastqs[i + 1 :]:
compare_dir = "/".join(compare["path"].split("_R")[0:-1])
if dir == compare_dir:
# check if R1 and R2 are present
r_check = set([fastq["metadata"]["R"], compare["metadata"]["R"]])
if r_check.issubset(expected_pair):
# Keep ordering consistent
if fastq["metadata"]["R"] == "R1":
sample_pairs.append((fastq, compare))
else:
sample_pairs.append((compare, fastq))
else:
sample_name = fastq["metadata"]["cmoSampleName"]
raise Exception(f"Improper pairing for: {sample_name}")
return sample_pairs
1 change: 1 addition & 0 deletions runner/run/objects/run_creator_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def create(self):
try:
run.job_group = JobGroup.objects.get(id=self.job_group_id)
except JobGroup.DoesNotExist:
run.job_group = JobGroup.objects.create()
print("[JobGroup] %s" % self.job_group_id)
try:
run.job_group_notifier = JobGroupNotifier.objects.get(id=self.job_group_notifier_id)
Expand Down

0 comments on commit 6ea0d1d

Please sign in to comment.