From 5eaa574ead906577ab618704388e6c6bea7ea32e Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Wed, 19 May 2021 16:59:17 -0400 Subject: [PATCH 01/14] operator to generate inputs for sample sheets --- beagle_etl/fixtures/beagle_etl.operator.json | 11 +++++++++++ runner/fixtures/runner.pipeline.json | 17 +++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/beagle_etl/fixtures/beagle_etl.operator.json b/beagle_etl/fixtures/beagle_etl.operator.json index 0e57c1b86..e4c1b46ce 100644 --- a/beagle_etl/fixtures/beagle_etl.operator.json +++ b/beagle_etl/fixtures/beagle_etl.operator.json @@ -97,5 +97,16 @@ "version": "v1.0.0", "slug": "AccessLegacyCNVOperator" } + }, + { + "model": "beagle_etl.operator", + "pk": 10, + "fields": { + "active": true, + "recipes": "[\"None\"]", + "class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator", + "version": "v1.0.0", + "slug": "AccessSampleSheetOperator" + } } ] diff --git a/runner/fixtures/runner.pipeline.json b/runner/fixtures/runner.pipeline.json index 74850cb70..9f325d1bf 100644 --- a/runner/fixtures/runner.pipeline.json +++ b/runner/fixtures/runner.pipeline.json @@ -171,5 +171,22 @@ "operator": 9, "default": true } + }, + { + "model": "runner.pipeline", + "pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb", + "fields": { + "created_date": "2019-11-18T17:46:45.118Z", + "modified_date": "2019-12-05T01:12:39.854Z", + "name": "sample sheet", + "github": "git@github.com:mskcc/ACCESS-Pipeline", + "version": "ij/output_bam_files_instead_of_directory", + "entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl", + "output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733", + "output_directory": "/work/access/production/runs/voyager/sample_sheets", + "operator": 10, + "default": true + } } + ] From 664bea3ffe3e560bde9b6989df8bf4d3021704f9 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Wed, 19 May 2021 17:00:15 -0400 Subject: [PATCH 02/14] operator to generate inputs for sample sheets --- .../access/v1_0_0/sample_sheet/__init__.py | 66 +++++++++++ .../operator/access/sample_sheet/__init__.py | 0 .../test_sample_sheet_operator.py | 111 ++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 runner/operator/access/v1_0_0/sample_sheet/__init__.py create mode 100644 runner/tests/operator/access/sample_sheet/__init__.py create mode 100644 runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py new file mode 100644 index 000000000..5ef186a08 --- /dev/null +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -0,0 +1,66 @@ +import os +import json +import logging +from jinja2 import Template + +from file_system.repository.file_repository import FileRepository +from runner.operator.operator import Operator +from runner.operator.access import get_request_id, get_request_id_runs, extract_tumor_ports +from runner.models import Port, RunStatus +from runner.serializers import APIRunCreateSerializer + + +logger = logging.getLogger(__name__) + +class AccessSampleSheetOperator(Operator): + def get_jobs(self): + """ + Convert job inputs into serialized jobs + + :return: list[(serialized job info, Job)] + """ + files = FileRepository.filter(queryset=self.files, + metadata={'requestId': self.request_id, + 'igocomplete': True}) + + samples = [] + for f in files: + if f.metadata["barcodeIndex"]: + barcodeIndex = f.metadata["barcodeIndex"].split("-") + index1 = barcodeIndex[0] + if len(barcodeIndex) > 1: + index2 = barcodeIndex[1] + + for lane in f.metadata["flowCellLanes"]: + samples.append({ + "lane": lane, + "sample_id": f.metadata["sampleId"], + "sample_plate": f.metadata["tumorOrNormal"], + "sample_well": f.metadata["recipe"], + "17_index_id": f.metadata["barcodeId"], + "index": index1, + "index2": index2, + "sample_project": "Project_" + self.request_id, + "description": f.metadata["dataAnalystName"] + }) + + inputs = [{ + "samples": samples + }] + + return [ + ( + APIRunCreateSerializer( + data={ + 'name': "Sample Sheet: %s, %i of %i" % (self.request_id, i + 1, len(inputs)), + 'app': self.get_pipeline_id(), + 'inputs': job, + 'tags': { + 'requestId': self.request_id + } + } + ), + job + ) + for i, job in enumerate(inputs) + ] diff --git a/runner/tests/operator/access/sample_sheet/__init__.py b/runner/tests/operator/access/sample_sheet/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py new file mode 100644 index 000000000..ea60ca5c6 --- /dev/null +++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py @@ -0,0 +1,111 @@ +import os +import json + +from django.test import TestCase + +from beagle.settings import ROOT_DIR +from beagle_etl.models import Operator +from runner.operator.operator_factory import OperatorFactory +from runner.operator.access.v1_0_0.sample_sheet import AccessSampleSheetOperator + + +COMMON_FIXTURES = [ + 'fixtures/tests/10075_D_single_TN_pair.file.json', + 'fixtures/tests/10075_D_single_TN_pair.filemetadata.json', + 'runner/fixtures/runner.pipeline.json', + 'runner/fixtures/runner.run.json', + 'runner/fixtures/runner.operator_run.json', + 'file_system/fixtures/file_system.filegroup.json', + 'file_system/fixtures/file_system.filetype.json', + 'file_system/fixtures/file_system.storage.json', + 'beagle_etl/fixtures/beagle_etl.operator.json', +] + + +class TestSampleSheeetOperator(TestCase): + + fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES] + + def test_sample_sheet_operator(self): + """ + Test that an Access legacy SV operator instance can be created and validated + """ + request_id = "10075_D" + + operator_model = Operator.objects.get(slug="AccessSampleSheetOperator") + operator = OperatorFactory.get_by_model(operator_model, request_id=request_id) + self.assertEqual(operator.get_jobs()[0][0].is_valid(), True) + input_json = operator.get_jobs()[0][0].initial_data['inputs'] + print(json.dumps(input_json)) + self.assertEqual(input_json, {"samples": [{"lane": 3, "sample_id": "10075_D_5", + "sample_plate": "Tumor", "sample_well": + "IMPACT468", "17_index_id": + "DUAL_IDT_LIB_267", "index": "GTATTGGC", + "index2": "TTGTCGGT", "sample_project": + "Project_10075_D", "description": ""}, + {"lane": 4, "sample_id": "10075_D_5", + "sample_plate": "Tumor", "sample_well": + "IMPACT468", "17_index_id": + "DUAL_IDT_LIB_267", "index": "GTATTGGC", + "index2": "TTGTCGGT", "sample_project": + "Project_10075_D", "description": ""}, + {"lane": 3, "sample_id": "10075_D_5", + "sample_plate": "Tumor", "sample_well": + "IMPACT468", "17_index_id": + "DUAL_IDT_LIB_267", "index": "GTATTGGC", + "index2": "TTGTCGGT", "sample_project": + "Project_10075_D", "description": ""}, + {"lane": 4, "sample_id": "10075_D_5", + "sample_plate": "Tumor", "sample_well": + "IMPACT468", "17_index_id": + "DUAL_IDT_LIB_267", "index": "GTATTGGC", + "index2": "TTGTCGGT", "sample_project": + "Project_10075_D", "description": ""}, + {"lane": 1, "sample_id": "10075_D_3", + "sample_plate": "Normal", "sample_well": + "IMPACT468", "17_index_id": None, "index": + "GTATTGGC", "index2": "TTGTCGGT", + "sample_project": "Project_10075_D", + "description": ""}, {"lane": 2, "sample_id": + "10075_D_3", + "sample_plate": + "Normal", "sample_well": + "IMPACT468", + "17_index_id": None, + "index": "GTATTGGC", + "index2": "TTGTCGGT", + "sample_project": + "Project_10075_D", + "description": ""}, + {"lane": 3, "sample_id": "10075_D_3", + "sample_plate": "Normal", "sample_well": + "IMPACT468", "17_index_id": None, "index": + "GTATTGGC", "index2": "TTGTCGGT", + "sample_project": "Project_10075_D", + "description": ""}, {"lane": 1, "sample_id": + "10075_D_3", + "sample_plate": + "Normal", "sample_well": + "IMPACT468", + "17_index_id": None, + "index": "GTATTGGC", + "index2": "TTGTCGGT", + "sample_project": + "Project_10075_D", + "description": ""}, + {"lane": 2, "sample_id": "10075_D_3", + "sample_plate": "Normal", "sample_well": + "IMPACT468", "17_index_id": None, "index": + "GTATTGGC", "index2": "TTGTCGGT", + "sample_project": "Project_10075_D", + "description": ""}, {"lane": 3, "sample_id": + "10075_D_3", + "sample_plate": + "Normal", "sample_well": + "IMPACT468", + "17_index_id": None, + "index": "GTATTGGC", + "index2": "TTGTCGGT", + "sample_project": + "Project_10075_D", + "description": ""}]}) From 371f82b48078cdee1679fcbe7eae21d0444266cc Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Wed, 19 May 2021 17:01:34 -0400 Subject: [PATCH 03/14] clean-up --- runner/operator/access/v1_0_0/sample_sheet/__init__.py | 5 ----- .../access/sample_sheet/test_sample_sheet_operator.py | 4 ---- 2 files changed, 9 deletions(-) diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index 5ef186a08..a88932e43 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -1,15 +1,10 @@ import os -import json import logging -from jinja2 import Template from file_system.repository.file_repository import FileRepository from runner.operator.operator import Operator -from runner.operator.access import get_request_id, get_request_id_runs, extract_tumor_ports -from runner.models import Port, RunStatus from runner.serializers import APIRunCreateSerializer - logger = logging.getLogger(__name__) class AccessSampleSheetOperator(Operator): diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py index ea60ca5c6..80d77a7d5 100644 --- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py +++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py @@ -27,11 +27,7 @@ class TestSampleSheeetOperator(TestCase): fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES] def test_sample_sheet_operator(self): - """ - Test that an Access legacy SV operator instance can be created and validated - """ request_id = "10075_D" - operator_model = Operator.objects.get(slug="AccessSampleSheetOperator") operator = OperatorFactory.get_by_model(operator_model, request_id=request_id) self.assertEqual(operator.get_jobs()[0][0].is_valid(), True) From fb0901a6f6f7d3d8e692c1f88088d47535136a01 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 21 May 2021 12:44:46 -0400 Subject: [PATCH 04/14] update sample sheets --- fixtures/tests/10075_D.filemetadata.json | 10 +++ .../10075_D_single_TN_pair.filemetadata.json | 8 ++ .../access/v1_0_0/sample_sheet/__init__.py | 25 ++++--- .../test_sample_sheet_operator.py | 75 +------------------ 4 files changed, 34 insertions(+), 84 deletions(-) diff --git a/fixtures/tests/10075_D.filemetadata.json b/fixtures/tests/10075_D.filemetadata.json index 4f9e4012d..4dd09d3e8 100644 --- a/fixtures/tests/10075_D.filemetadata.json +++ b/fixtures/tests/10075_D.filemetadata.json @@ -18,6 +18,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_5", + "cmoSampleName": "s_C_0CREWW_L013_d", "barcodeId": "DUAL_IDT_LIB_267", "libraryId": "10075_D_5_1_1_1", "patientId": "C-8VK0V7", @@ -93,6 +94,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_4", + "cmoSampleName": "s_C_0CREWW_L012_d", "barcodeId": "DUAL_IDT_LIB_255", "libraryId": "10075_D_4_1_1_1", "patientId": "C-DRKHP7", @@ -168,6 +170,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_2", + "cmoSampleName": "s_C_0CREWW_L011_d", "barcodeId": null, "libraryId": "10075_D_2", "patientId": "C-DRKHP7", @@ -244,6 +247,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_3", + "cmoSampleName": "s_C_0CREWW_L010_d", "barcodeId": null, "libraryId": "10075_D_3", "patientId": "C-8VK0V7", @@ -320,6 +324,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_4", + "cmoSampleName": "s_C_0CREWW_L010_d", "barcodeId": "DUAL_IDT_LIB_255", "libraryId": null, "patientId": "C-DRKHP7", @@ -395,6 +400,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_2", + "cmoSampleName": "s_C_0CREWW_L009_d", "barcodeId": null, "libraryId": null, "patientId": "C-DRKHP7", @@ -471,6 +477,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_3", + "cmoSampleName": "s_C_0CREWW_L008_d", "barcodeId": null, "libraryId": null, "patientId": "C-8VK0V7", @@ -547,6 +554,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_5", + "cmoSampleName": "s_C_0CREWW_L007_d", "barcodeId": "DUAL_IDT_LIB_267", "libraryId": null, "patientId": "C-8VK0V7", @@ -622,6 +630,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_1", + "cmoSampleName": "s_C_0CREWW_L006_d", "barcodeId": "DUAL_IDT_LIB_243", "libraryId": "10075_D_1_1_1_1", "patientId": "C-DRKHP7", @@ -697,6 +706,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_1", + "cmoSampleName": "s_C_0CREWW_L005_d", "barcodeId": "DUAL_IDT_LIB_243", "libraryId": null, "patientId": "C-DRKHP7", diff --git a/fixtures/tests/10075_D_single_TN_pair.filemetadata.json b/fixtures/tests/10075_D_single_TN_pair.filemetadata.json index a225f294a..878e30ce9 100644 --- a/fixtures/tests/10075_D_single_TN_pair.filemetadata.json +++ b/fixtures/tests/10075_D_single_TN_pair.filemetadata.json @@ -19,6 +19,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_5", + "cmoSampleName": "s_C_0CREWW_L013_d", "barcodeId": "DUAL_IDT_LIB_267", "libraryId": "10075_D_5_1_1_1", "patientId": "C-8VK0V7", @@ -31,6 +32,7 @@ "captureName": "Pool-09483_R-10075_D-Tube7_1", "igocomplete": true, "labHeadName": "John Smith", + "dnaInputNg": 12.0, "barcodeIndex": "GTATTGGC-TTGTCGGT", "labHeadEmail": "email@internet.com", "oncoTreeCode": "MEL", @@ -95,6 +97,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_5", + "cmoSampleName": "s_C_0CREWW_L012_d", "barcodeId": "DUAL_IDT_LIB_267", "libraryId": null, "patientId": "C-8VK0V7", @@ -107,6 +110,7 @@ "captureName": "Pool-09483_R-10075_D-Tube7_1", "igocomplete": true, "labHeadName": "John Smith", + "dnaInputNg": 12.0, "barcodeIndex": "GTATTGGC-TTGTCGGT", "labHeadEmail": "email@internet.com", "oncoTreeCode": "MEL", @@ -171,6 +175,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_3", + "cmoSampleName": "s_C_0CREWW_L011_d", "barcodeId": null, "libraryId": "10075_D_3", "patientId": "C-8VK0V7", @@ -183,6 +188,7 @@ "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1", "igocomplete": true, "labHeadName": "John Smith", + "dnaInputNg": 12.0, "barcodeIndex": null, "labHeadEmail": "email@internet.com", "oncoTreeCode": null, @@ -248,6 +254,7 @@ "runMode": "HiSeq High Output", "species": "Human", "sampleId": "10075_D_3", + "cmoSampleName": "s_C_0CREWW_L010_d", "barcodeId": null, "libraryId": null, "patientId": "C-8VK0V7", @@ -260,6 +267,7 @@ "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1", "igocomplete": true, "labHeadName": "John Smith", + "dnaInputNg": 12.0, "barcodeIndex": null, "labHeadEmail": "email@internet.com", "oncoTreeCode": null, diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index a88932e43..ee943de92 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -23,20 +23,23 @@ def get_jobs(self): if f.metadata["barcodeIndex"]: barcodeIndex = f.metadata["barcodeIndex"].split("-") index1 = barcodeIndex[0] - if len(barcodeIndex) > 1: - index2 = barcodeIndex[1] for lane in f.metadata["flowCellLanes"]: samples.append({ - "lane": lane, - "sample_id": f.metadata["sampleId"], - "sample_plate": f.metadata["tumorOrNormal"], - "sample_well": f.metadata["recipe"], - "17_index_id": f.metadata["barcodeId"], - "index": index1, - "index2": index2, - "sample_project": "Project_" + self.request_id, - "description": f.metadata["dataAnalystName"] + "Lane": lane, + "SampleID": f.metadata["cmoSampleName"][2:].replace("_", "-"), + "SampleRef": f.metadata["species"], + "Index": index1, + "Description": f.metadata["tumorOrNormal"], + "Control": "N", + "Recipe": f.metadata["recipe"], + "Operator": "|".join(["AR", "-;-", f.metadata["sampleId"], f.metadata["sex"], "NOVASEQ"]), + "SampleProject": "Project_" + self.request_id, + "DnaInputNg": f.metadata["dnaInputNg"], + "CaptureInputNg": f.metadata["captureInputNg"], + "LibraryVolume": f.metadata["libraryVolume"], + "PatientId": f.metadata["patientId"], + "IgoID": f.metadata["sampleId"], }) inputs = [{ diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py index 80d77a7d5..f5f3de994 100644 --- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py +++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py @@ -32,76 +32,5 @@ def test_sample_sheet_operator(self): operator = OperatorFactory.get_by_model(operator_model, request_id=request_id) self.assertEqual(operator.get_jobs()[0][0].is_valid(), True) input_json = operator.get_jobs()[0][0].initial_data['inputs'] - print(json.dumps(input_json)) - self.assertEqual(input_json, {"samples": [{"lane": 3, "sample_id": "10075_D_5", - "sample_plate": "Tumor", "sample_well": - "IMPACT468", "17_index_id": - "DUAL_IDT_LIB_267", "index": "GTATTGGC", - "index2": "TTGTCGGT", "sample_project": - "Project_10075_D", "description": ""}, - {"lane": 4, "sample_id": "10075_D_5", - "sample_plate": "Tumor", "sample_well": - "IMPACT468", "17_index_id": - "DUAL_IDT_LIB_267", "index": "GTATTGGC", - "index2": "TTGTCGGT", "sample_project": - "Project_10075_D", "description": ""}, - {"lane": 3, "sample_id": "10075_D_5", - "sample_plate": "Tumor", "sample_well": - "IMPACT468", "17_index_id": - "DUAL_IDT_LIB_267", "index": "GTATTGGC", - "index2": "TTGTCGGT", "sample_project": - "Project_10075_D", "description": ""}, - {"lane": 4, "sample_id": "10075_D_5", - "sample_plate": "Tumor", "sample_well": - "IMPACT468", "17_index_id": - "DUAL_IDT_LIB_267", "index": "GTATTGGC", - "index2": "TTGTCGGT", "sample_project": - "Project_10075_D", "description": ""}, - {"lane": 1, "sample_id": "10075_D_3", - "sample_plate": "Normal", "sample_well": - "IMPACT468", "17_index_id": None, "index": - "GTATTGGC", "index2": "TTGTCGGT", - "sample_project": "Project_10075_D", - "description": ""}, {"lane": 2, "sample_id": - "10075_D_3", - "sample_plate": - "Normal", "sample_well": - "IMPACT468", - "17_index_id": None, - "index": "GTATTGGC", - "index2": "TTGTCGGT", - "sample_project": - "Project_10075_D", - "description": ""}, - {"lane": 3, "sample_id": "10075_D_3", - "sample_plate": "Normal", "sample_well": - "IMPACT468", "17_index_id": None, "index": - "GTATTGGC", "index2": "TTGTCGGT", - "sample_project": "Project_10075_D", - "description": ""}, {"lane": 1, "sample_id": - "10075_D_3", - "sample_plate": - "Normal", "sample_well": - "IMPACT468", - "17_index_id": None, - "index": "GTATTGGC", - "index2": "TTGTCGGT", - "sample_project": - "Project_10075_D", - "description": ""}, - {"lane": 2, "sample_id": "10075_D_3", - "sample_plate": "Normal", "sample_well": - "IMPACT468", "17_index_id": None, "index": - "GTATTGGC", "index2": "TTGTCGGT", - "sample_project": "Project_10075_D", - "description": ""}, {"lane": 3, "sample_id": - "10075_D_3", - "sample_plate": - "Normal", "sample_well": - "IMPACT468", - "17_index_id": None, - "index": "GTATTGGC", - "index2": "TTGTCGGT", - "sample_project": - "Project_10075_D", - "description": ""}]}) + #print(json.dumps(input_json)) + self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}]}) From 1a472956e8cf0d35e7d48d9cb231e7194d3555e3 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 21 May 2021 15:09:54 -0400 Subject: [PATCH 05/14] change Id to ID --- runner/operator/access/v1_0_0/sample_sheet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index ee943de92..c719847f4 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -38,7 +38,7 @@ def get_jobs(self): "DnaInputNg": f.metadata["dnaInputNg"], "CaptureInputNg": f.metadata["captureInputNg"], "LibraryVolume": f.metadata["libraryVolume"], - "PatientId": f.metadata["patientId"], + "PatientID": f.metadata["patientId"], "IgoID": f.metadata["sampleId"], }) From 59b8e3d9793f46944707faef9cf3fbcd44b8e0a0 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 21 May 2021 15:10:25 -0400 Subject: [PATCH 06/14] update tests --- .../operator/access/sample_sheet/test_sample_sheet_operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py index f5f3de994..d96bc0f5a 100644 --- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py +++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py @@ -33,4 +33,4 @@ def test_sample_sheet_operator(self): self.assertEqual(operator.get_jobs()[0][0].is_valid(), True) input_json = operator.get_jobs()[0][0].initial_data['inputs'] #print(json.dumps(input_json)) - self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}]}) + self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}]}) From 2bbc4581ed207015526929b970e8e392294693b5 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 21 May 2021 15:44:17 -0400 Subject: [PATCH 07/14] update sample name --- runner/operator/access/v1_0_0/sample_sheet/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index c719847f4..e8cef87f7 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -27,7 +27,7 @@ def get_jobs(self): for lane in f.metadata["flowCellLanes"]: samples.append({ "Lane": lane, - "SampleID": f.metadata["cmoSampleName"][2:].replace("_", "-"), + "SampleID": f.metadata["sampleName"], "SampleRef": f.metadata["species"], "Index": index1, "Description": f.metadata["tumorOrNormal"], From 6d2f3c01f606944d3b65a3022d901a333ca7c258 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Thu, 26 Aug 2021 12:44:47 -0400 Subject: [PATCH 08/14] add request id to metadata for output --- runner/operator/access/v1_0_0/sample_sheet/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index e8cef87f7..d3e1fc84c 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -53,6 +53,9 @@ def get_jobs(self): 'name': "Sample Sheet: %s, %i of %i" % (self.request_id, i + 1, len(inputs)), 'app': self.get_pipeline_id(), 'inputs': job, + 'output_metadata': { + 'requestId': self.request_id + }, 'tags': { 'requestId': self.request_id } From 78d74689f5b3c0319a92d6ff032472f2ea6c29b1 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Thu, 26 Aug 2021 15:23:01 -0400 Subject: [PATCH 09/14] add mpath submitter --- beagle/settings.py | 2 + .../access/v1_0_0/mpath_submitter/__init__.py | 91 +++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 runner/operator/access/v1_0_0/mpath_submitter/__init__.py diff --git a/beagle/settings.py b/beagle/settings.py index b5d326ccf..7fdcf737b 100644 --- a/beagle/settings.py +++ b/beagle/settings.py @@ -322,6 +322,8 @@ RIDGEBACK_URL = os.environ.get('BEAGLE_RIDGEBACK_URL', 'http://localhost:5003') +MPATH_URL = os.environ.get('BEAGLE_MPATH_URL', 'http://localhost:7331') + LOG_PATH = os.environ.get('BEAGLE_LOG_PATH', 'beagle-server.log') LOGGING = { diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py new file mode 100644 index 000000000..f0682d768 --- /dev/null +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -0,0 +1,91 @@ +import requests + +from beagle.settings import MPATH_URL +from runner.operator.operator import Operator +from runner.models import PortType, Run, RunStatus, File + +WORKFLOW_NAME_TO_MPATH_TYPE = { + "access legacy MSI": "microsatellite_instability", + "access legacy SNV": "snp_indel_variants", + "access legacy SV": "structural_variants", + "access legacy CNV": "copy_number_variants", +} + +WORKFLOW_NAME_TO_MPATH_LOCATION_KEY = { + "access legacy MSI": "msi_fs_location", + "access legacy SNV": "snp_fs_location", + "access legacy SV": "sv_fs_location", + "access legacy CNV": "cnv_fs_location", +} + + + +def get_sample_sheet(job_group_id): + sample_sheet_run = Run.objects.filter( + job_group_id=job_group_id, + run__app__name="sample sheet", + run__status=RunStatus.COMPLETED + ).order_by('-created_date').first() + + sample_sheet = File.objects.filter( + port__run=sample_sheet_run, + port__port_type=PortType.OUTPUT + ).first() + + return sample_sheet.path + + +""" +For additional information on how Voyager results are mounted on MPath server see: +https://app.gitbook.com/@mskcc-1/s/voyager/mpath/commands +""" +def juno_path_to_mpath(path): + [_, p] = path.split("/voyager") + return "/voyager" + p + + +def submit_to_mpath(workflow_name, files, sample_sheet_path): + mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name] + location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name] + + data = { + "dmp_alys_task_name": "Project_", + "ss_location": [ + juno_path_to_mpath(sample_sheet_path) + ], + "options": [ + mpath_type + ], + } + data[location_key] = [juno_path_to_mpath(f.path) for f in files] + + payload = { + "data": [data] + } + + requests.post(MPATH_URL + "/ngs/", json=payload) + + +def get_files(runs): + File.objects.filter( + port__run__in=runs, + port__run__status=RunStatus.COMPLETED, + port__port_type=PortType.OUTPUT + ).all() + + +class AccessMPathSubmitter(Operator): + def get_jobs(self): + runs = Run.objects.filter(id__in=self.run_ids) + meta_run = runs[0] + + pipeline_name = meta_run.pipeline.name + job_group_id = meta_run.job_group_id + + sample_sheet_path = get_sample_sheet(job_group_id) + + files = get_files(runs) + + submit_to_mpath(pipeline_name, files, sample_sheet_path) + + return [] From 86eeca7ab52b140f05c498a8ebf05901b35b988c Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Thu, 26 Aug 2021 22:37:12 -0400 Subject: [PATCH 10/14] some tweaks --- .../access/v1_0_0/mpath_submitter/__init__.py | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py index f0682d768..9dff75577 100644 --- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -1,3 +1,10 @@ +""" +" This operator submits each downstream workflow to MPath +" An operator trigger, for each downstream workflow (MSI/CNV/SV/SNV), +" _when all runs are complete_, should be created. +" For additional information on the API and how MPath ACCESS server +" is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/ +""" import requests from beagle.settings import MPATH_URL @@ -23,8 +30,8 @@ def get_sample_sheet(job_group_id): sample_sheet_run = Run.objects.filter( job_group_id=job_group_id, - run__app__name="sample sheet", - run__status=RunStatus.COMPLETED + app__name="sample sheet", + status=RunStatus.COMPLETED ).order_by('-created_date').first() sample_sheet = File.objects.filter( @@ -35,21 +42,17 @@ def get_sample_sheet(job_group_id): return sample_sheet.path -""" -For additional information on how Voyager results are mounted on MPath server see: -https://app.gitbook.com/@mskcc-1/s/voyager/mpath/commands -""" def juno_path_to_mpath(path): [_, p] = path.split("/voyager") return "/voyager" + p -def submit_to_mpath(workflow_name, files, sample_sheet_path): +def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path): mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name] location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name] data = { - "dmp_alys_task_name": "Project_", + "dmp_alys_task_name": "Project_" + request_id, "ss_location": [ juno_path_to_mpath(sample_sheet_path) ], @@ -67,7 +70,7 @@ def submit_to_mpath(workflow_name, files, sample_sheet_path): def get_files(runs): - File.objects.filter( + return File.objects.filter( port__run__in=runs, port__run__status=RunStatus.COMPLETED, port__port_type=PortType.OUTPUT @@ -79,13 +82,14 @@ def get_jobs(self): runs = Run.objects.filter(id__in=self.run_ids) meta_run = runs[0] - pipeline_name = meta_run.pipeline.name + request_id = meta_run.metadata["requestId"] + pipeline_name = meta_run.app.name job_group_id = meta_run.job_group_id sample_sheet_path = get_sample_sheet(job_group_id) files = get_files(runs) - submit_to_mpath(pipeline_name, files, sample_sheet_path) + submit_to_mpath(request_id, pipeline_name, files, sample_sheet_path) return [] From ab1e7a6df4c81c6c9114cdbcd7f63a33009e6f29 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 27 Aug 2021 09:58:35 -0400 Subject: [PATCH 11/14] updates to mpath submitter --- .../access/v1_0_0/mpath_submitter/__init__.py | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py index 9dff75577..066a5a6f2 100644 --- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -12,7 +12,7 @@ from runner.models import PortType, Run, RunStatus, File WORKFLOW_NAME_TO_MPATH_TYPE = { - "access legacy MSI": "microsatellite_instability", + "access legacy MSI": "admie_microsatellite_instability", "access legacy SNV": "snp_indel_variants", "access legacy SV": "structural_variants", "access legacy CNV": "copy_number_variants", @@ -47,7 +47,31 @@ def juno_path_to_mpath(path): return "/voyager" + p -def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path): +# This will return 400 bad request if the project already exists. +def submit_project(request_id): + payload = { + "data": [ + { + "comments": "", + "dmp_alys_task_name": "Project_" + request_id, + "dmp_alys_task_type_cv_id": 7, + # TODO + "analyst_cv_id": None, + "dmp_dms_at_id": None, + "dmp_dms_id": None, + "dmp_lims_id": None, + "fellow_cv_id": None, + "fs_location": "N/A", + "is_clinical": 0, + "pathologist_cv_id": None + } + ] + } + + requests.post(MPATH_URL + "/ngs/projects", json=payload) + + +def submit_workflow(request_id, workflow_name, files, sample_sheet_path): mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name] location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name] @@ -56,8 +80,12 @@ def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path): "ss_location": [ juno_path_to_mpath(sample_sheet_path) ], + # This shouldn't be required. We can't use READONLY dirs so pointing to + # /voyager does not work. Talk with Anoop on what should be done. + "fs_location": "/srv", "options": [ - mpath_type + mpath_type, + "samples" ], } data[location_key] = [juno_path_to_mpath(f.path) for f in files] @@ -90,6 +118,7 @@ def get_jobs(self): files = get_files(runs) - submit_to_mpath(request_id, pipeline_name, files, sample_sheet_path) + submit_project(request_id) + submit_workflow(request_id, pipeline_name, files, sample_sheet_path) return [] From a29007072c6371b955525f68ad560f69eeebe23b Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 27 Aug 2021 10:11:50 -0400 Subject: [PATCH 12/14] updates to mpath submitter --- .../access/v1_0_0/mpath_submitter/__init__.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py index 066a5a6f2..27d764159 100644 --- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -27,11 +27,21 @@ -def get_sample_sheet(job_group_id): +def get_sample_sheet(request_id, job_group_id): sample_sheet_run = Run.objects.filter( - job_group_id=job_group_id, app__name="sample sheet", - status=RunStatus.COMPLETED + status=RunStatus.COMPLETED, + + # Using job_group_id is better but in order to trigger MPath Submitter + # for the massive backlog where Sample Sheet was generated in a different + # job_group we have to use request ID + # To trigger this for the backlog see: + # https://app.gitbook.com/@mskcc-1/s/voyager/debugging/using-the-django-shell + # Once the backlog is submitted this should be reverted + + # job_group_id=job_group_id, + + tags__requestId=request_id ).order_by('-created_date').first() sample_sheet = File.objects.filter( @@ -114,7 +124,7 @@ def get_jobs(self): pipeline_name = meta_run.app.name job_group_id = meta_run.job_group_id - sample_sheet_path = get_sample_sheet(job_group_id) + sample_sheet_path = get_sample_sheet(request_id, job_group_id) files = get_files(runs) From f214ce62f513be28e3314b632b7141c3f7b070b3 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 27 Aug 2021 12:25:44 -0400 Subject: [PATCH 13/14] rename project name so MPath can pick it up --- runner/operator/access/v1_0_0/mpath_submitter/__init__.py | 4 ++-- runner/operator/access/v1_0_0/sample_sheet/__init__.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py index 27d764159..3fe02242d 100644 --- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -63,7 +63,7 @@ def submit_project(request_id): "data": [ { "comments": "", - "dmp_alys_task_name": "Project_" + request_id, + "dmp_alys_task_name": "ACCESSv1-" + request_id, "dmp_alys_task_type_cv_id": 7, # TODO "analyst_cv_id": None, @@ -86,7 +86,7 @@ def submit_workflow(request_id, workflow_name, files, sample_sheet_path): location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name] data = { - "dmp_alys_task_name": "Project_" + request_id, + "dmp_alys_task_name": "ACCESSv1-" + request_id, "ss_location": [ juno_path_to_mpath(sample_sheet_path) ], diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py index d3e1fc84c..25683e6ec 100644 --- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py +++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py @@ -34,7 +34,7 @@ def get_jobs(self): "Control": "N", "Recipe": f.metadata["recipe"], "Operator": "|".join(["AR", "-;-", f.metadata["sampleId"], f.metadata["sex"], "NOVASEQ"]), - "SampleProject": "Project_" + self.request_id, + "SampleProject": "ACCESSv1-" + self.request_id, "DnaInputNg": f.metadata["dnaInputNg"], "CaptureInputNg": f.metadata["captureInputNg"], "LibraryVolume": f.metadata["libraryVolume"], From cdef142ae0fa18e3a4590d7c4991e4d9d89d2db4 Mon Sep 17 00:00:00 2001 From: Adrian Fraiha Date: Fri, 27 Aug 2021 13:00:50 -0400 Subject: [PATCH 14/14] updates --- .../access/v1_0_0/mpath_submitter/__init__.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py index 3fe02242d..a3efcebd4 100644 --- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py +++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py @@ -1,6 +1,6 @@ """ -" This operator submits each downstream workflow to MPath -" An operator trigger, for each downstream workflow (MSI/CNV/SV/SNV), +" This operator submits each downstream pipeline to MPath +" An operator trigger, for each downstream pipeline (MSI/CNV/SV/SNV), " _when all runs are complete_, should be created. " For additional information on the API and how MPath ACCESS server " is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/ @@ -11,20 +11,26 @@ from runner.operator.operator import Operator from runner.models import PortType, Run, RunStatus, File -WORKFLOW_NAME_TO_MPATH_TYPE = { +PIPELINE_NAME_TO_MPATH_TYPE = { "access legacy MSI": "admie_microsatellite_instability", "access legacy SNV": "snp_indel_variants", "access legacy SV": "structural_variants", "access legacy CNV": "copy_number_variants", } -WORKFLOW_NAME_TO_MPATH_LOCATION_KEY = { +PIPELINE_NAME_TO_MPATH_LOCATION_KEY = { "access legacy MSI": "msi_fs_location", "access legacy SNV": "snp_fs_location", "access legacy SV": "sv_fs_location", "access legacy CNV": "cnv_fs_location", } +PIPELINE_NAME_TO_FILES = { + "access legacy MSI": ["msi_results.txt"], + "access legacy SNV": [], #TODO + "access legacy SV": [], + "access legacy CNV": [] +} def get_sample_sheet(request_id, job_group_id): @@ -81,9 +87,9 @@ def submit_project(request_id): requests.post(MPATH_URL + "/ngs/projects", json=payload) -def submit_workflow(request_id, workflow_name, files, sample_sheet_path): - mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name] - location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name] +def submit_pipeline(request_id, pipeline_name, files, sample_sheet_path): + mpath_type = PIPELINE_NAME_TO_MPATH_TYPE[pipeline_name] + location_key = PIPELINE_NAME_TO_MPATH_LOCATION_KEY[pipeline_name] data = { "dmp_alys_task_name": "ACCESSv1-" + request_id, @@ -107,11 +113,12 @@ def submit_workflow(request_id, workflow_name, files, sample_sheet_path): requests.post(MPATH_URL + "/ngs/", json=payload) -def get_files(runs): +def get_files(pipeline_name, runs): return File.objects.filter( port__run__in=runs, port__run__status=RunStatus.COMPLETED, - port__port_type=PortType.OUTPUT + port__port_type=PortType.OUTPUT, + file_name__in=PIPELINE_NAME_TO_FILES[pipeline_name] ).all() @@ -126,9 +133,9 @@ def get_jobs(self): sample_sheet_path = get_sample_sheet(request_id, job_group_id) - files = get_files(runs) + files = get_files(pipeline_name, runs) submit_project(request_id) - submit_workflow(request_id, pipeline_name, files, sample_sheet_path) + submit_pipeline(request_id, pipeline_name, files, sample_sheet_path) return []