Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Operator to generate inputs for sample sheets #848

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
11 changes: 11 additions & 0 deletions beagle_etl/fixtures/beagle_etl.operator.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,16 @@
"version": "v1.0.0",
"slug": "AccessLegacyCNVOperator"
}
},
{
"model": "beagle_etl.operator",
"pk": 10,
"fields": {
"active": true,
"recipes": "[\"None\"]",
"class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator",
"version": "v1.0.0",
"slug": "AccessSampleSheetOperator"
}
}
]
10 changes: 10 additions & 0 deletions fixtures/tests/10075_D.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -93,6 +94,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": "10075_D_4_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -168,6 +170,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_2",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -244,6 +247,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -320,6 +324,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_4",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": "DUAL_IDT_LIB_255",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -395,6 +400,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_2",
"cmoSampleName": "s_C_0CREWW_L009_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -471,6 +477,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L008_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -547,6 +554,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L007_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand Down Expand Up @@ -622,6 +630,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L006_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": "10075_D_1_1_1_1",
"patientId": "C-DRKHP7",
Expand Down Expand Up @@ -697,6 +706,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_1",
"cmoSampleName": "s_C_0CREWW_L005_d",
"barcodeId": "DUAL_IDT_LIB_243",
"libraryId": null,
"patientId": "C-DRKHP7",
Expand Down
8 changes: 8 additions & 0 deletions fixtures/tests/10075_D_single_TN_pair.filemetadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L013_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": "10075_D_5_1_1_1",
"patientId": "C-8VK0V7",
Expand All @@ -31,6 +32,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "[email protected]",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -95,6 +97,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_5",
"cmoSampleName": "s_C_0CREWW_L012_d",
"barcodeId": "DUAL_IDT_LIB_267",
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -107,6 +110,7 @@
"captureName": "Pool-09483_R-10075_D-Tube7_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": "GTATTGGC-TTGTCGGT",
"labHeadEmail": "[email protected]",
"oncoTreeCode": "MEL",
Expand Down Expand Up @@ -171,6 +175,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L011_d",
"barcodeId": null,
"libraryId": "10075_D_3",
"patientId": "C-8VK0V7",
Expand All @@ -183,6 +188,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "[email protected]",
"oncoTreeCode": null,
Expand Down Expand Up @@ -248,6 +254,7 @@
"runMode": "HiSeq High Output",
"species": "Human",
"sampleId": "10075_D_3",
"cmoSampleName": "s_C_0CREWW_L010_d",
"barcodeId": null,
"libraryId": null,
"patientId": "C-8VK0V7",
Expand All @@ -260,6 +267,7 @@
"captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
"igocomplete": true,
"labHeadName": "John Smith",
"dnaInputNg": 12.0,
"barcodeIndex": null,
"labHeadEmail": "[email protected]",
"oncoTreeCode": null,
Expand Down
17 changes: 17 additions & 0 deletions runner/fixtures/runner.pipeline.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,5 +171,22 @@
"operator": 9,
"default": true
}
},
{
"model": "runner.pipeline",
"pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb",
"fields": {
"created_date": "2019-11-18T17:46:45.118Z",
"modified_date": "2019-12-05T01:12:39.854Z",
"name": "sample sheet",
"github": "[email protected]:mskcc/ACCESS-Pipeline",
"version": "ij/output_bam_files_instead_of_directory",
"entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl",
"output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733",
"output_directory": "/work/access/production/runs/voyager/sample_sheets",
"operator": 10,
"default": true
}
}

]
64 changes: 64 additions & 0 deletions runner/operator/access/v1_0_0/sample_sheet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import os
import logging

from file_system.repository.file_repository import FileRepository
from runner.operator.operator import Operator
from runner.serializers import APIRunCreateSerializer

logger = logging.getLogger(__name__)

class AccessSampleSheetOperator(Operator):
def get_jobs(self):
"""
Convert job inputs into serialized jobs

:return: list[(serialized job info, Job)]
"""
files = FileRepository.filter(queryset=self.files,
metadata={'requestId': self.request_id,
'igocomplete': True})

samples = []
for f in files:
if f.metadata["barcodeIndex"]:
barcodeIndex = f.metadata["barcodeIndex"].split("-")
index1 = barcodeIndex[0]

for lane in f.metadata["flowCellLanes"]:
samples.append({
"Lane": lane,
"SampleID": f.metadata["cmoSampleName"][2:].replace("_", "-"),
ionox0 marked this conversation as resolved.
Show resolved Hide resolved
"SampleRef": f.metadata["species"],
"Index": index1,
"Description": f.metadata["tumorOrNormal"],
"Control": "N",
"Recipe": f.metadata["recipe"],
"Operator": "|".join(["AR", "-;-", f.metadata["sampleId"], f.metadata["sex"], "NOVASEQ"]),
"SampleProject": "Project_" + self.request_id,
"DnaInputNg": f.metadata["dnaInputNg"],
"CaptureInputNg": f.metadata["captureInputNg"],
"LibraryVolume": f.metadata["libraryVolume"],
"PatientID": f.metadata["patientId"],
"IgoID": f.metadata["sampleId"],
})

inputs = [{
"samples": samples
}]

return [
(
APIRunCreateSerializer(
data={
'name': "Sample Sheet: %s, %i of %i" % (self.request_id, i + 1, len(inputs)),
'app': self.get_pipeline_id(),
'inputs': job,
'tags': {
'requestId': self.request_id
}
}
),
job
)
for i, job in enumerate(inputs)
]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import json

from django.test import TestCase

from beagle.settings import ROOT_DIR
from beagle_etl.models import Operator
from runner.operator.operator_factory import OperatorFactory
from runner.operator.access.v1_0_0.sample_sheet import AccessSampleSheetOperator


COMMON_FIXTURES = [
'fixtures/tests/10075_D_single_TN_pair.file.json',
'fixtures/tests/10075_D_single_TN_pair.filemetadata.json',
'runner/fixtures/runner.pipeline.json',
'runner/fixtures/runner.run.json',
'runner/fixtures/runner.operator_run.json',
'file_system/fixtures/file_system.filegroup.json',
'file_system/fixtures/file_system.filetype.json',
'file_system/fixtures/file_system.storage.json',
'beagle_etl/fixtures/beagle_etl.operator.json',
]


class TestSampleSheeetOperator(TestCase):

fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES]

def test_sample_sheet_operator(self):
request_id = "10075_D"
operator_model = Operator.objects.get(slug="AccessSampleSheetOperator")
operator = OperatorFactory.get_by_model(operator_model, request_id=request_id)
self.assertEqual(operator.get_jobs()[0][0].is_valid(), True)
input_json = operator.get_jobs()[0][0].initial_data['inputs']
#print(json.dumps(input_json))
self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}]})