From 5eaa574ead906577ab618704388e6c6bea7ea32e Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Wed, 19 May 2021 16:59:17 -0400
Subject: [PATCH 01/14] operator to generate inputs for sample sheets

---
 beagle_etl/fixtures/beagle_etl.operator.json | 11 +++++++++++
 runner/fixtures/runner.pipeline.json         | 17 +++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/beagle_etl/fixtures/beagle_etl.operator.json b/beagle_etl/fixtures/beagle_etl.operator.json
index 0e57c1b86..e4c1b46ce 100644
--- a/beagle_etl/fixtures/beagle_etl.operator.json
+++ b/beagle_etl/fixtures/beagle_etl.operator.json
@@ -97,5 +97,16 @@
             "version": "v1.0.0",
             "slug": "AccessLegacyCNVOperator"
         }
+    },
+    {
+        "model": "beagle_etl.operator",
+        "pk": 10,
+        "fields": {
+            "active": true,
+            "recipes": "[\"None\"]",
+            "class_name": "runner.operator.access.v1_0_0.sample_sheet.AccessSampleSheetOperator",
+            "version": "v1.0.0",
+            "slug": "AccessSampleSheetOperator"
+        }
     }
 ]
diff --git a/runner/fixtures/runner.pipeline.json b/runner/fixtures/runner.pipeline.json
index 74850cb70..9f325d1bf 100644
--- a/runner/fixtures/runner.pipeline.json
+++ b/runner/fixtures/runner.pipeline.json
@@ -171,5 +171,22 @@
             "operator": 9,
             "default": true
         }
+    },
+    {
+        "model": "runner.pipeline",
+        "pk": "65419097-a2b8-4d57-a8ab-c4c4cddcbffb",
+        "fields": {
+            "created_date": "2019-11-18T17:46:45.118Z",
+            "modified_date": "2019-12-05T01:12:39.854Z",
+            "name": "sample sheet",
+            "github": "git@github.com:mskcc/ACCESS-Pipeline",
+            "version": "ij/output_bam_files_instead_of_directory",
+            "entrypoint": "cwl_tools/sample_sheet/sample_sheet.cwl",
+            "output_file_group": "a975f490-1b02-4575-abae-a4f8e3667733",
+            "output_directory": "/work/access/production/runs/voyager/sample_sheets",
+            "operator": 10,
+            "default": true
+        }
     }
+
 ]

From 664bea3ffe3e560bde9b6989df8bf4d3021704f9 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Wed, 19 May 2021 17:00:15 -0400
Subject: [PATCH 02/14] operator to generate inputs for sample sheets

---
 .../access/v1_0_0/sample_sheet/__init__.py    |  66 +++++++++++
 .../operator/access/sample_sheet/__init__.py  |   0
 .../test_sample_sheet_operator.py             | 111 ++++++++++++++++++
 3 files changed, 177 insertions(+)
 create mode 100644 runner/operator/access/v1_0_0/sample_sheet/__init__.py
 create mode 100644 runner/tests/operator/access/sample_sheet/__init__.py
 create mode 100644 runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py

diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
new file mode 100644
index 000000000..5ef186a08
--- /dev/null
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -0,0 +1,66 @@
+import os
+import json
+import logging
+from jinja2 import Template
+
+from file_system.repository.file_repository import FileRepository
+from runner.operator.operator import Operator
+from runner.operator.access import get_request_id, get_request_id_runs, extract_tumor_ports
+from runner.models import Port, RunStatus
+from runner.serializers import APIRunCreateSerializer
+
+
+logger = logging.getLogger(__name__)
+
+class AccessSampleSheetOperator(Operator):
+    def get_jobs(self):
+        """
+        Convert job inputs into serialized jobs
+
+        :return: list[(serialized job info, Job)]
+        """
+        files = FileRepository.filter(queryset=self.files,
+                                      metadata={'requestId': self.request_id,
+                                                'igocomplete': True})
+
+        samples = []
+        for f in files:
+            if f.metadata["barcodeIndex"]:
+                barcodeIndex = f.metadata["barcodeIndex"].split("-")
+                index1 = barcodeIndex[0]
+                if len(barcodeIndex) > 1:
+                    index2 = barcodeIndex[1]
+
+            for lane in f.metadata["flowCellLanes"]:
+                samples.append({
+                    "lane": lane,
+                    "sample_id": f.metadata["sampleId"],
+                    "sample_plate": f.metadata["tumorOrNormal"],
+                    "sample_well": f.metadata["recipe"],
+                    "17_index_id": f.metadata["barcodeId"],
+                    "index": index1,
+                    "index2": index2,
+                    "sample_project": "Project_" + self.request_id,
+                    "description": f.metadata["dataAnalystName"]
+                })
+
+        inputs = [{
+            "samples": samples
+        }]
+
+        return [
+            (
+                APIRunCreateSerializer(
+                    data={
+                        'name': "Sample Sheet: %s, %i of %i" % (self.request_id, i + 1, len(inputs)),
+                        'app': self.get_pipeline_id(),
+                        'inputs': job,
+                        'tags': {
+                            'requestId': self.request_id
+                        }
+                    }
+                ),
+                job
+             )
+            for i, job in enumerate(inputs)
+        ]
diff --git a/runner/tests/operator/access/sample_sheet/__init__.py b/runner/tests/operator/access/sample_sheet/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
new file mode 100644
index 000000000..ea60ca5c6
--- /dev/null
+++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
@@ -0,0 +1,111 @@
+import os
+import json
+
+from django.test import TestCase
+
+from beagle.settings import ROOT_DIR
+from beagle_etl.models import Operator
+from runner.operator.operator_factory import OperatorFactory
+from runner.operator.access.v1_0_0.sample_sheet import AccessSampleSheetOperator
+
+
+COMMON_FIXTURES = [
+    'fixtures/tests/10075_D_single_TN_pair.file.json',
+    'fixtures/tests/10075_D_single_TN_pair.filemetadata.json',
+    'runner/fixtures/runner.pipeline.json',
+    'runner/fixtures/runner.run.json',
+    'runner/fixtures/runner.operator_run.json',
+    'file_system/fixtures/file_system.filegroup.json',
+    'file_system/fixtures/file_system.filetype.json',
+    'file_system/fixtures/file_system.storage.json',
+    'beagle_etl/fixtures/beagle_etl.operator.json',
+]
+
+
+class TestSampleSheeetOperator(TestCase):
+
+    fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES]
+
+    def test_sample_sheet_operator(self):
+        """
+        Test that an Access legacy SV operator instance can be created and validated
+        """
+        request_id = "10075_D"
+
+        operator_model = Operator.objects.get(slug="AccessSampleSheetOperator")
+        operator = OperatorFactory.get_by_model(operator_model, request_id=request_id)
+        self.assertEqual(operator.get_jobs()[0][0].is_valid(), True)
+        input_json = operator.get_jobs()[0][0].initial_data['inputs']
+        print(json.dumps(input_json))
+        self.assertEqual(input_json, {"samples": [{"lane": 3, "sample_id": "10075_D_5",
+                                                   "sample_plate": "Tumor", "sample_well":
+                                                   "IMPACT468", "17_index_id":
+                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
+                                                   "index2": "TTGTCGGT", "sample_project":
+                                                   "Project_10075_D", "description": ""},
+                                                  {"lane": 4, "sample_id": "10075_D_5",
+                                                   "sample_plate": "Tumor", "sample_well":
+                                                   "IMPACT468", "17_index_id":
+                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
+                                                   "index2": "TTGTCGGT", "sample_project":
+                                                   "Project_10075_D", "description": ""},
+                                                  {"lane": 3, "sample_id": "10075_D_5",
+                                                   "sample_plate": "Tumor", "sample_well":
+                                                   "IMPACT468", "17_index_id":
+                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
+                                                   "index2": "TTGTCGGT", "sample_project":
+                                                   "Project_10075_D", "description": ""},
+                                                  {"lane": 4, "sample_id": "10075_D_5",
+                                                   "sample_plate": "Tumor", "sample_well":
+                                                   "IMPACT468", "17_index_id":
+                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
+                                                   "index2": "TTGTCGGT", "sample_project":
+                                                   "Project_10075_D", "description": ""},
+                                                  {"lane": 1, "sample_id": "10075_D_3",
+                                                   "sample_plate": "Normal", "sample_well":
+                                                   "IMPACT468", "17_index_id": None, "index":
+                                                   "GTATTGGC", "index2": "TTGTCGGT",
+                                                   "sample_project": "Project_10075_D",
+                                                   "description": ""}, {"lane": 2, "sample_id":
+                                                                        "10075_D_3",
+                                                                        "sample_plate":
+                                                                        "Normal", "sample_well":
+                                                                        "IMPACT468",
+                                                                        "17_index_id": None,
+                                                                        "index": "GTATTGGC",
+                                                                        "index2": "TTGTCGGT",
+                                                                        "sample_project":
+                                                                        "Project_10075_D",
+                                                                        "description": ""},
+                                                  {"lane": 3, "sample_id": "10075_D_3",
+                                                   "sample_plate": "Normal", "sample_well":
+                                                   "IMPACT468", "17_index_id": None, "index":
+                                                   "GTATTGGC", "index2": "TTGTCGGT",
+                                                   "sample_project": "Project_10075_D",
+                                                   "description": ""}, {"lane": 1, "sample_id":
+                                                                        "10075_D_3",
+                                                                        "sample_plate":
+                                                                        "Normal", "sample_well":
+                                                                        "IMPACT468",
+                                                                        "17_index_id": None,
+                                                                        "index": "GTATTGGC",
+                                                                        "index2": "TTGTCGGT",
+                                                                        "sample_project":
+                                                                        "Project_10075_D",
+                                                                        "description": ""},
+                                                  {"lane": 2, "sample_id": "10075_D_3",
+                                                   "sample_plate": "Normal", "sample_well":
+                                                   "IMPACT468", "17_index_id": None, "index":
+                                                   "GTATTGGC", "index2": "TTGTCGGT",
+                                                   "sample_project": "Project_10075_D",
+                                                   "description": ""}, {"lane": 3, "sample_id":
+                                                                        "10075_D_3",
+                                                                        "sample_plate":
+                                                                        "Normal", "sample_well":
+                                                                        "IMPACT468",
+                                                                        "17_index_id": None,
+                                                                        "index": "GTATTGGC",
+                                                                        "index2": "TTGTCGGT",
+                                                                        "sample_project":
+                                                                        "Project_10075_D",
+                                                                        "description": ""}]})

From 371f82b48078cdee1679fcbe7eae21d0444266cc Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Wed, 19 May 2021 17:01:34 -0400
Subject: [PATCH 03/14] clean-up

---
 runner/operator/access/v1_0_0/sample_sheet/__init__.py       | 5 -----
 .../access/sample_sheet/test_sample_sheet_operator.py        | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index 5ef186a08..a88932e43 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -1,15 +1,10 @@
 import os
-import json
 import logging
-from jinja2 import Template
 
 from file_system.repository.file_repository import FileRepository
 from runner.operator.operator import Operator
-from runner.operator.access import get_request_id, get_request_id_runs, extract_tumor_ports
-from runner.models import Port, RunStatus
 from runner.serializers import APIRunCreateSerializer
 
-
 logger = logging.getLogger(__name__)
 
 class AccessSampleSheetOperator(Operator):
diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
index ea60ca5c6..80d77a7d5 100644
--- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
+++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
@@ -27,11 +27,7 @@ class TestSampleSheeetOperator(TestCase):
     fixtures = [os.path.join(ROOT_DIR, f) for f in COMMON_FIXTURES]
 
     def test_sample_sheet_operator(self):
-        """
-        Test that an Access legacy SV operator instance can be created and validated
-        """
         request_id = "10075_D"
-
         operator_model = Operator.objects.get(slug="AccessSampleSheetOperator")
         operator = OperatorFactory.get_by_model(operator_model, request_id=request_id)
         self.assertEqual(operator.get_jobs()[0][0].is_valid(), True)

From fb0901a6f6f7d3d8e692c1f88088d47535136a01 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 21 May 2021 12:44:46 -0400
Subject: [PATCH 04/14] update sample sheets

---
 fixtures/tests/10075_D.filemetadata.json      | 10 +++
 .../10075_D_single_TN_pair.filemetadata.json  |  8 ++
 .../access/v1_0_0/sample_sheet/__init__.py    | 25 ++++---
 .../test_sample_sheet_operator.py             | 75 +------------------
 4 files changed, 34 insertions(+), 84 deletions(-)

diff --git a/fixtures/tests/10075_D.filemetadata.json b/fixtures/tests/10075_D.filemetadata.json
index 4f9e4012d..4dd09d3e8 100644
--- a/fixtures/tests/10075_D.filemetadata.json
+++ b/fixtures/tests/10075_D.filemetadata.json
@@ -18,6 +18,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L013_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": "10075_D_5_1_1_1",
                 "patientId": "C-8VK0V7",
@@ -93,6 +94,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_4",
+                "cmoSampleName": "s_C_0CREWW_L012_d",
                 "barcodeId": "DUAL_IDT_LIB_255",
                 "libraryId": "10075_D_4_1_1_1",
                 "patientId": "C-DRKHP7",
@@ -168,6 +170,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_2",
+                "cmoSampleName": "s_C_0CREWW_L011_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_2",
                 "patientId": "C-DRKHP7",
@@ -244,6 +247,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_3",
                 "patientId": "C-8VK0V7",
@@ -320,6 +324,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_4",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": "DUAL_IDT_LIB_255",
                 "libraryId": null,
                 "patientId": "C-DRKHP7",
@@ -395,6 +400,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_2",
+                "cmoSampleName": "s_C_0CREWW_L009_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-DRKHP7",
@@ -471,6 +477,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L008_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -547,6 +554,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L007_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -622,6 +630,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_1",
+                "cmoSampleName": "s_C_0CREWW_L006_d",
                 "barcodeId": "DUAL_IDT_LIB_243",
                 "libraryId": "10075_D_1_1_1_1",
                 "patientId": "C-DRKHP7",
@@ -697,6 +706,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_1",
+                "cmoSampleName": "s_C_0CREWW_L005_d",
                 "barcodeId": "DUAL_IDT_LIB_243",
                 "libraryId": null,
                 "patientId": "C-DRKHP7",
diff --git a/fixtures/tests/10075_D_single_TN_pair.filemetadata.json b/fixtures/tests/10075_D_single_TN_pair.filemetadata.json
index a225f294a..878e30ce9 100644
--- a/fixtures/tests/10075_D_single_TN_pair.filemetadata.json
+++ b/fixtures/tests/10075_D_single_TN_pair.filemetadata.json
@@ -19,6 +19,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L013_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": "10075_D_5_1_1_1",
                 "patientId": "C-8VK0V7",
@@ -31,6 +32,7 @@
                 "captureName": "Pool-09483_R-10075_D-Tube7_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": "GTATTGGC-TTGTCGGT",
                 "labHeadEmail": "email@internet.com",
                 "oncoTreeCode": "MEL",
@@ -95,6 +97,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_5",
+                "cmoSampleName": "s_C_0CREWW_L012_d",
                 "barcodeId": "DUAL_IDT_LIB_267",
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -107,6 +110,7 @@
                 "captureName": "Pool-09483_R-10075_D-Tube7_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": "GTATTGGC-TTGTCGGT",
                 "labHeadEmail": "email@internet.com",
                 "oncoTreeCode": "MEL",
@@ -171,6 +175,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L011_d",
                 "barcodeId": null,
                 "libraryId": "10075_D_3",
                 "patientId": "C-8VK0V7",
@@ -183,6 +188,7 @@
                 "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": null,
                 "labHeadEmail": "email@internet.com",
                 "oncoTreeCode": null,
@@ -248,6 +254,7 @@
                 "runMode": "HiSeq High Output",
                 "species": "Human",
                 "sampleId": "10075_D_3",
+                "cmoSampleName": "s_C_0CREWW_L010_d",
                 "barcodeId": null,
                 "libraryId": null,
                 "patientId": "C-8VK0V7",
@@ -260,6 +267,7 @@
                 "captureName": "Pool-05257_CD-06287_AY-10075_D-Tube2_1",
                 "igocomplete": true,
                 "labHeadName": "John Smith",
+                "dnaInputNg": 12.0,
                 "barcodeIndex": null,
                 "labHeadEmail": "email@internet.com",
                 "oncoTreeCode": null,
diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index a88932e43..ee943de92 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -23,20 +23,23 @@ def get_jobs(self):
             if f.metadata["barcodeIndex"]:
                 barcodeIndex = f.metadata["barcodeIndex"].split("-")
                 index1 = barcodeIndex[0]
-                if len(barcodeIndex) > 1:
-                    index2 = barcodeIndex[1]
 
             for lane in f.metadata["flowCellLanes"]:
                 samples.append({
-                    "lane": lane,
-                    "sample_id": f.metadata["sampleId"],
-                    "sample_plate": f.metadata["tumorOrNormal"],
-                    "sample_well": f.metadata["recipe"],
-                    "17_index_id": f.metadata["barcodeId"],
-                    "index": index1,
-                    "index2": index2,
-                    "sample_project": "Project_" + self.request_id,
-                    "description": f.metadata["dataAnalystName"]
+                    "Lane": lane,
+                    "SampleID": f.metadata["cmoSampleName"][2:].replace("_", "-"),
+                    "SampleRef": f.metadata["species"],
+                    "Index": index1,
+                    "Description": f.metadata["tumorOrNormal"],
+                    "Control": "N",
+                    "Recipe": f.metadata["recipe"],
+                    "Operator": "|".join(["AR", "-;-", f.metadata["sampleId"], f.metadata["sex"], "NOVASEQ"]),
+                    "SampleProject": "Project_" + self.request_id,
+                    "DnaInputNg": f.metadata["dnaInputNg"],
+                    "CaptureInputNg": f.metadata["captureInputNg"],
+                    "LibraryVolume": f.metadata["libraryVolume"],
+                    "PatientId": f.metadata["patientId"],
+                    "IgoID": f.metadata["sampleId"],
                 })
 
         inputs = [{
diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
index 80d77a7d5..f5f3de994 100644
--- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
+++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
@@ -32,76 +32,5 @@ def test_sample_sheet_operator(self):
         operator = OperatorFactory.get_by_model(operator_model, request_id=request_id)
         self.assertEqual(operator.get_jobs()[0][0].is_valid(), True)
         input_json = operator.get_jobs()[0][0].initial_data['inputs']
-        print(json.dumps(input_json))
-        self.assertEqual(input_json, {"samples": [{"lane": 3, "sample_id": "10075_D_5",
-                                                   "sample_plate": "Tumor", "sample_well":
-                                                   "IMPACT468", "17_index_id":
-                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
-                                                   "index2": "TTGTCGGT", "sample_project":
-                                                   "Project_10075_D", "description": ""},
-                                                  {"lane": 4, "sample_id": "10075_D_5",
-                                                   "sample_plate": "Tumor", "sample_well":
-                                                   "IMPACT468", "17_index_id":
-                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
-                                                   "index2": "TTGTCGGT", "sample_project":
-                                                   "Project_10075_D", "description": ""},
-                                                  {"lane": 3, "sample_id": "10075_D_5",
-                                                   "sample_plate": "Tumor", "sample_well":
-                                                   "IMPACT468", "17_index_id":
-                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
-                                                   "index2": "TTGTCGGT", "sample_project":
-                                                   "Project_10075_D", "description": ""},
-                                                  {"lane": 4, "sample_id": "10075_D_5",
-                                                   "sample_plate": "Tumor", "sample_well":
-                                                   "IMPACT468", "17_index_id":
-                                                   "DUAL_IDT_LIB_267", "index": "GTATTGGC",
-                                                   "index2": "TTGTCGGT", "sample_project":
-                                                   "Project_10075_D", "description": ""},
-                                                  {"lane": 1, "sample_id": "10075_D_3",
-                                                   "sample_plate": "Normal", "sample_well":
-                                                   "IMPACT468", "17_index_id": None, "index":
-                                                   "GTATTGGC", "index2": "TTGTCGGT",
-                                                   "sample_project": "Project_10075_D",
-                                                   "description": ""}, {"lane": 2, "sample_id":
-                                                                        "10075_D_3",
-                                                                        "sample_plate":
-                                                                        "Normal", "sample_well":
-                                                                        "IMPACT468",
-                                                                        "17_index_id": None,
-                                                                        "index": "GTATTGGC",
-                                                                        "index2": "TTGTCGGT",
-                                                                        "sample_project":
-                                                                        "Project_10075_D",
-                                                                        "description": ""},
-                                                  {"lane": 3, "sample_id": "10075_D_3",
-                                                   "sample_plate": "Normal", "sample_well":
-                                                   "IMPACT468", "17_index_id": None, "index":
-                                                   "GTATTGGC", "index2": "TTGTCGGT",
-                                                   "sample_project": "Project_10075_D",
-                                                   "description": ""}, {"lane": 1, "sample_id":
-                                                                        "10075_D_3",
-                                                                        "sample_plate":
-                                                                        "Normal", "sample_well":
-                                                                        "IMPACT468",
-                                                                        "17_index_id": None,
-                                                                        "index": "GTATTGGC",
-                                                                        "index2": "TTGTCGGT",
-                                                                        "sample_project":
-                                                                        "Project_10075_D",
-                                                                        "description": ""},
-                                                  {"lane": 2, "sample_id": "10075_D_3",
-                                                   "sample_plate": "Normal", "sample_well":
-                                                   "IMPACT468", "17_index_id": None, "index":
-                                                   "GTATTGGC", "index2": "TTGTCGGT",
-                                                   "sample_project": "Project_10075_D",
-                                                   "description": ""}, {"lane": 3, "sample_id":
-                                                                        "10075_D_3",
-                                                                        "sample_plate":
-                                                                        "Normal", "sample_well":
-                                                                        "IMPACT468",
-                                                                        "17_index_id": None,
-                                                                        "index": "GTATTGGC",
-                                                                        "index2": "TTGTCGGT",
-                                                                        "sample_project":
-                                                                        "Project_10075_D",
-                                                                        "description": ""}]})
+        #print(json.dumps(input_json))
+        self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}]})

From 1a472956e8cf0d35e7d48d9cb231e7194d3555e3 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 21 May 2021 15:09:54 -0400
Subject: [PATCH 05/14] change Id to ID

---
 runner/operator/access/v1_0_0/sample_sheet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index ee943de92..c719847f4 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -38,7 +38,7 @@ def get_jobs(self):
                     "DnaInputNg": f.metadata["dnaInputNg"],
                     "CaptureInputNg": f.metadata["captureInputNg"],
                     "LibraryVolume": f.metadata["libraryVolume"],
-                    "PatientId": f.metadata["patientId"],
+                    "PatientID": f.metadata["patientId"],
                     "IgoID": f.metadata["sampleId"],
                 })
 

From 59b8e3d9793f46944707faef9cf3fbcd44b8e0a0 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 21 May 2021 15:10:25 -0400
Subject: [PATCH 06/14] update tests

---
 .../operator/access/sample_sheet/test_sample_sheet_operator.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
index f5f3de994..d96bc0f5a 100644
--- a/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
+++ b/runner/tests/operator/access/sample_sheet/test_sample_sheet_operator.py
@@ -33,4 +33,4 @@ def test_sample_sheet_operator(self):
         self.assertEqual(operator.get_jobs()[0][0].is_valid(), True)
         input_json = operator.get_jobs()[0][0].initial_data['inputs']
         #print(json.dumps(input_json))
-        self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientId": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientId": "C-8VK0V7", "IgoID": "10075_D_3"}]})
+        self.assertEqual(input_json, {"samples": [{"Lane": 3, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L013-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 3, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 4, "SampleID": "C-0CREWW-L012-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Tumor", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_5|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": 35.0, "PatientID": "C-8VK0V7", "IgoID": "10075_D_5"}, {"Lane": 1, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L011-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 1, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 2, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}, {"Lane": 3, "SampleID": "C-0CREWW-L010-d", "SampleRef": "Human", "Index": "GTATTGGC", "Description": "Normal", "Control": "N", "Recipe": "IMPACT468", "Operator": "AR|-;-|10075_D_3|F|NOVASEQ", "SampleProject": "Project_10075_D", "DnaInputNg": 12.0, "CaptureInputNg": "110.0", "LibraryVolume": None, "PatientID": "C-8VK0V7", "IgoID": "10075_D_3"}]})

From 2bbc4581ed207015526929b970e8e392294693b5 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 21 May 2021 15:44:17 -0400
Subject: [PATCH 07/14] update sample name

---
 runner/operator/access/v1_0_0/sample_sheet/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index c719847f4..e8cef87f7 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -27,7 +27,7 @@ def get_jobs(self):
             for lane in f.metadata["flowCellLanes"]:
                 samples.append({
                     "Lane": lane,
-                    "SampleID": f.metadata["cmoSampleName"][2:].replace("_", "-"),
+                    "SampleID": f.metadata["sampleName"],
                     "SampleRef": f.metadata["species"],
                     "Index": index1,
                     "Description": f.metadata["tumorOrNormal"],

From 6d2f3c01f606944d3b65a3022d901a333ca7c258 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Thu, 26 Aug 2021 12:44:47 -0400
Subject: [PATCH 08/14] add request id to metadata for output

---
 runner/operator/access/v1_0_0/sample_sheet/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index e8cef87f7..d3e1fc84c 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -53,6 +53,9 @@ def get_jobs(self):
                         'name': "Sample Sheet: %s, %i of %i" % (self.request_id, i + 1, len(inputs)),
                         'app': self.get_pipeline_id(),
                         'inputs': job,
+                        'output_metadata': {
+                            'requestId': self.request_id
+                        },
                         'tags': {
                             'requestId': self.request_id
                         }

From 78d74689f5b3c0319a92d6ff032472f2ea6c29b1 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Thu, 26 Aug 2021 15:23:01 -0400
Subject: [PATCH 09/14] add mpath submitter

---
 beagle/settings.py                            |  2 +
 .../access/v1_0_0/mpath_submitter/__init__.py | 91 +++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 runner/operator/access/v1_0_0/mpath_submitter/__init__.py

diff --git a/beagle/settings.py b/beagle/settings.py
index b5d326ccf..7fdcf737b 100644
--- a/beagle/settings.py
+++ b/beagle/settings.py
@@ -322,6 +322,8 @@
 
 RIDGEBACK_URL = os.environ.get('BEAGLE_RIDGEBACK_URL', 'http://localhost:5003')
 
+MPATH_URL = os.environ.get('BEAGLE_MPATH_URL', 'http://localhost:7331')
+
 LOG_PATH = os.environ.get('BEAGLE_LOG_PATH', 'beagle-server.log')
 
 LOGGING = {
diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
new file mode 100644
index 000000000..f0682d768
--- /dev/null
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -0,0 +1,91 @@
+import requests
+
+from beagle.settings import MPATH_URL
+from runner.operator.operator import Operator
+from runner.models import PortType, Run, RunStatus, File
+
+WORKFLOW_NAME_TO_MPATH_TYPE = {
+    "access legacy MSI": "microsatellite_instability",
+    "access legacy SNV": "snp_indel_variants",
+    "access legacy SV": "structural_variants",
+    "access legacy CNV": "copy_number_variants",
+}
+
+WORKFLOW_NAME_TO_MPATH_LOCATION_KEY = {
+    "access legacy MSI": "msi_fs_location",
+    "access legacy SNV": "snp_fs_location",
+    "access legacy SV": "sv_fs_location",
+    "access legacy CNV": "cnv_fs_location",
+}
+
+
+
+def get_sample_sheet(job_group_id):
+    sample_sheet_run = Run.objects.filter(
+        job_group_id=job_group_id,
+        run__app__name="sample sheet",
+        run__status=RunStatus.COMPLETED
+    ).order_by('-created_date').first()
+
+    sample_sheet = File.objects.filter(
+        port__run=sample_sheet_run,
+        port__port_type=PortType.OUTPUT
+    ).first()
+
+    return sample_sheet.path
+
+
+"""
+For additional information on how Voyager results are mounted on MPath server see:
+https://app.gitbook.com/@mskcc-1/s/voyager/mpath/commands
+"""
+def juno_path_to_mpath(path):
+    [_, p] = path.split("/voyager")
+    return "/voyager" + p
+
+
+def submit_to_mpath(workflow_name, files, sample_sheet_path):
+    mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name]
+    location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name]
+
+    data = {
+        "dmp_alys_task_name": "Project_<Request_ID>",
+        "ss_location": [
+            juno_path_to_mpath(sample_sheet_path)
+        ],
+        "options": [
+            mpath_type
+        ],
+    }
+    data[location_key] = [juno_path_to_mpath(f.path) for f in files]
+
+    payload = {
+        "data": [data]
+    }
+
+    requests.post(MPATH_URL + "/ngs/", json=payload)
+
+
+def get_files(runs):
+    File.objects.filter(
+        port__run__in=runs,
+        port__run__status=RunStatus.COMPLETED,
+        port__port_type=PortType.OUTPUT
+    ).all()
+
+
+class AccessMPathSubmitter(Operator):
+    def get_jobs(self):
+        runs = Run.objects.filter(id__in=self.run_ids)
+        meta_run = runs[0]
+
+        pipeline_name = meta_run.pipeline.name
+        job_group_id = meta_run.job_group_id
+
+        sample_sheet_path = get_sample_sheet(job_group_id)
+
+        files = get_files(runs)
+
+        submit_to_mpath(pipeline_name, files, sample_sheet_path)
+
+        return []

From 86eeca7ab52b140f05c498a8ebf05901b35b988c Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Thu, 26 Aug 2021 22:37:12 -0400
Subject: [PATCH 10/14] some tweaks

---
 .../access/v1_0_0/mpath_submitter/__init__.py | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
index f0682d768..9dff75577 100644
--- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -1,3 +1,10 @@
+"""
+" This operator submits each downstream workflow to MPath
+" An operator trigger, for each downstream workflow (MSI/CNV/SV/SNV),
+" _when all runs are complete_, should be created.
+" For additional information on the API and how MPath ACCESS server
+" is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/
+"""
 import requests
 
 from beagle.settings import MPATH_URL
@@ -23,8 +30,8 @@
 def get_sample_sheet(job_group_id):
     sample_sheet_run = Run.objects.filter(
         job_group_id=job_group_id,
-        run__app__name="sample sheet",
-        run__status=RunStatus.COMPLETED
+        app__name="sample sheet",
+        status=RunStatus.COMPLETED
     ).order_by('-created_date').first()
 
     sample_sheet = File.objects.filter(
@@ -35,21 +42,17 @@ def get_sample_sheet(job_group_id):
     return sample_sheet.path
 
 
-"""
-For additional information on how Voyager results are mounted on MPath server see:
-https://app.gitbook.com/@mskcc-1/s/voyager/mpath/commands
-"""
 def juno_path_to_mpath(path):
     [_, p] = path.split("/voyager")
     return "/voyager" + p
 
 
-def submit_to_mpath(workflow_name, files, sample_sheet_path):
+def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path):
     mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name]
     location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name]
 
     data = {
-        "dmp_alys_task_name": "Project_<Request_ID>",
+        "dmp_alys_task_name": "Project_" + request_id,
         "ss_location": [
             juno_path_to_mpath(sample_sheet_path)
         ],
@@ -67,7 +70,7 @@ def submit_to_mpath(workflow_name, files, sample_sheet_path):
 
 
 def get_files(runs):
-    File.objects.filter(
+    return File.objects.filter(
         port__run__in=runs,
         port__run__status=RunStatus.COMPLETED,
         port__port_type=PortType.OUTPUT
@@ -79,13 +82,14 @@ def get_jobs(self):
         runs = Run.objects.filter(id__in=self.run_ids)
         meta_run = runs[0]
 
-        pipeline_name = meta_run.pipeline.name
+        request_id = meta_run.metadata["requestId"]
+        pipeline_name = meta_run.app.name
         job_group_id = meta_run.job_group_id
 
         sample_sheet_path = get_sample_sheet(job_group_id)
 
         files = get_files(runs)
 
-        submit_to_mpath(pipeline_name, files, sample_sheet_path)
+        submit_to_mpath(request_id, pipeline_name, files, sample_sheet_path)
 
         return []

From ab1e7a6df4c81c6c9114cdbcd7f63a33009e6f29 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 27 Aug 2021 09:58:35 -0400
Subject: [PATCH 11/14] updates to mpath submitter

---
 .../access/v1_0_0/mpath_submitter/__init__.py | 37 +++++++++++++++++--
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
index 9dff75577..066a5a6f2 100644
--- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -12,7 +12,7 @@
 from runner.models import PortType, Run, RunStatus, File
 
 WORKFLOW_NAME_TO_MPATH_TYPE = {
-    "access legacy MSI": "microsatellite_instability",
+    "access legacy MSI": "admie_microsatellite_instability",
     "access legacy SNV": "snp_indel_variants",
     "access legacy SV": "structural_variants",
     "access legacy CNV": "copy_number_variants",
@@ -47,7 +47,31 @@ def juno_path_to_mpath(path):
     return "/voyager" + p
 
 
-def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path):
+# This will return 400 bad request if the project already exists.
+def submit_project(request_id):
+    payload = {
+        "data": [
+            {
+                "comments": "",
+                "dmp_alys_task_name": "Project_" + request_id,
+                "dmp_alys_task_type_cv_id": 7,
+                # TODO
+                "analyst_cv_id": None,
+                "dmp_dms_at_id": None,
+                "dmp_dms_id": None,
+                "dmp_lims_id": None,
+                "fellow_cv_id": None,
+                "fs_location": "N/A",
+                "is_clinical": 0,
+                "pathologist_cv_id": None
+            }
+        ]
+    }
+
+    requests.post(MPATH_URL + "/ngs/projects", json=payload)
+
+
+def submit_workflow(request_id, workflow_name, files, sample_sheet_path):
     mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name]
     location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name]
 
@@ -56,8 +80,12 @@ def submit_to_mpath(request_id, workflow_name, files, sample_sheet_path):
         "ss_location": [
             juno_path_to_mpath(sample_sheet_path)
         ],
+        # This shouldn't be required. We can't use READONLY dirs so pointing to 
+        # /voyager does not work. Talk with Anoop on what should be done.
+        "fs_location": "/srv",
         "options": [
-            mpath_type
+            mpath_type,
+            "samples"
         ],
     }
     data[location_key] = [juno_path_to_mpath(f.path) for f in files]
@@ -90,6 +118,7 @@ def get_jobs(self):
 
         files = get_files(runs)
 
-        submit_to_mpath(request_id, pipeline_name, files, sample_sheet_path)
+        submit_project(request_id)
+        submit_workflow(request_id, pipeline_name, files, sample_sheet_path)
 
         return []

From a29007072c6371b955525f68ad560f69eeebe23b Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 27 Aug 2021 10:11:50 -0400
Subject: [PATCH 12/14] updates to mpath submitter

---
 .../access/v1_0_0/mpath_submitter/__init__.py  | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
index 066a5a6f2..27d764159 100644
--- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -27,11 +27,21 @@
 
 
 
-def get_sample_sheet(job_group_id):
+def get_sample_sheet(request_id, job_group_id):
     sample_sheet_run = Run.objects.filter(
-        job_group_id=job_group_id,
         app__name="sample sheet",
-        status=RunStatus.COMPLETED
+        status=RunStatus.COMPLETED,
+
+        # Using job_group_id is better but in order to trigger MPath Submitter
+        # for the massive backlog where Sample Sheet was generated in a different
+        # job_group we have to use request ID
+        # To trigger this for the backlog see:
+        # https://app.gitbook.com/@mskcc-1/s/voyager/debugging/using-the-django-shell
+        # Once the backlog is submitted this should be reverted
+
+        # job_group_id=job_group_id,
+
+        tags__requestId=request_id
     ).order_by('-created_date').first()
 
     sample_sheet = File.objects.filter(
@@ -114,7 +124,7 @@ def get_jobs(self):
         pipeline_name = meta_run.app.name
         job_group_id = meta_run.job_group_id
 
-        sample_sheet_path = get_sample_sheet(job_group_id)
+        sample_sheet_path = get_sample_sheet(request_id, job_group_id)
 
         files = get_files(runs)
 

From f214ce62f513be28e3314b632b7141c3f7b070b3 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 27 Aug 2021 12:25:44 -0400
Subject: [PATCH 13/14] rename project name so MPath can pick it up

---
 runner/operator/access/v1_0_0/mpath_submitter/__init__.py | 4 ++--
 runner/operator/access/v1_0_0/sample_sheet/__init__.py    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
index 27d764159..3fe02242d 100644
--- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -63,7 +63,7 @@ def submit_project(request_id):
         "data": [
             {
                 "comments": "",
-                "dmp_alys_task_name": "Project_" + request_id,
+                "dmp_alys_task_name": "ACCESSv1-" + request_id,
                 "dmp_alys_task_type_cv_id": 7,
                 # TODO
                 "analyst_cv_id": None,
@@ -86,7 +86,7 @@ def submit_workflow(request_id, workflow_name, files, sample_sheet_path):
     location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name]
 
     data = {
-        "dmp_alys_task_name": "Project_" + request_id,
+        "dmp_alys_task_name": "ACCESSv1-" + request_id,
         "ss_location": [
             juno_path_to_mpath(sample_sheet_path)
         ],
diff --git a/runner/operator/access/v1_0_0/sample_sheet/__init__.py b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
index d3e1fc84c..25683e6ec 100644
--- a/runner/operator/access/v1_0_0/sample_sheet/__init__.py
+++ b/runner/operator/access/v1_0_0/sample_sheet/__init__.py
@@ -34,7 +34,7 @@ def get_jobs(self):
                     "Control": "N",
                     "Recipe": f.metadata["recipe"],
                     "Operator": "|".join(["AR", "-;-", f.metadata["sampleId"], f.metadata["sex"], "NOVASEQ"]),
-                    "SampleProject": "Project_" + self.request_id,
+                    "SampleProject": "ACCESSv1-" + self.request_id,
                     "DnaInputNg": f.metadata["dnaInputNg"],
                     "CaptureInputNg": f.metadata["captureInputNg"],
                     "LibraryVolume": f.metadata["libraryVolume"],

From cdef142ae0fa18e3a4590d7c4991e4d9d89d2db4 Mon Sep 17 00:00:00 2001
From: Adrian Fraiha <aef@fastmail.com>
Date: Fri, 27 Aug 2021 13:00:50 -0400
Subject: [PATCH 14/14] updates

---
 .../access/v1_0_0/mpath_submitter/__init__.py | 29 ++++++++++++-------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
index 3fe02242d..a3efcebd4 100644
--- a/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
+++ b/runner/operator/access/v1_0_0/mpath_submitter/__init__.py
@@ -1,6 +1,6 @@
 """
-" This operator submits each downstream workflow to MPath
-" An operator trigger, for each downstream workflow (MSI/CNV/SV/SNV),
+" This operator submits each downstream pipeline to MPath
+" An operator trigger, for each downstream pipeline (MSI/CNV/SV/SNV),
 " _when all runs are complete_, should be created.
 " For additional information on the API and how MPath ACCESS server
 " is set-up, see https://app.gitbook.com/@mskcc-1/s/voyager/mpath/
@@ -11,20 +11,26 @@
 from runner.operator.operator import Operator
 from runner.models import PortType, Run, RunStatus, File
 
-WORKFLOW_NAME_TO_MPATH_TYPE = {
+PIPELINE_NAME_TO_MPATH_TYPE = {
     "access legacy MSI": "admie_microsatellite_instability",
     "access legacy SNV": "snp_indel_variants",
     "access legacy SV": "structural_variants",
     "access legacy CNV": "copy_number_variants",
 }
 
-WORKFLOW_NAME_TO_MPATH_LOCATION_KEY = {
+PIPELINE_NAME_TO_MPATH_LOCATION_KEY = {
     "access legacy MSI": "msi_fs_location",
     "access legacy SNV": "snp_fs_location",
     "access legacy SV": "sv_fs_location",
     "access legacy CNV": "cnv_fs_location",
 }
 
+PIPELINE_NAME_TO_FILES = {
+    "access legacy MSI": ["msi_results.txt"],
+    "access legacy SNV": [], #TODO
+    "access legacy SV": [], 
+    "access legacy CNV": []
+}
 
 
 def get_sample_sheet(request_id, job_group_id):
@@ -81,9 +87,9 @@ def submit_project(request_id):
     requests.post(MPATH_URL + "/ngs/projects", json=payload)
 
 
-def submit_workflow(request_id, workflow_name, files, sample_sheet_path):
-    mpath_type = WORKFLOW_NAME_TO_MPATH_TYPE[workflow_name]
-    location_key = WORKFLOW_NAME_TO_MPATH_LOCATION_KEY[workflow_name]
+def submit_pipeline(request_id, pipeline_name, files, sample_sheet_path):
+    mpath_type = PIPELINE_NAME_TO_MPATH_TYPE[pipeline_name]
+    location_key = PIPELINE_NAME_TO_MPATH_LOCATION_KEY[pipeline_name]
 
     data = {
         "dmp_alys_task_name": "ACCESSv1-" + request_id,
@@ -107,11 +113,12 @@ def submit_workflow(request_id, workflow_name, files, sample_sheet_path):
     requests.post(MPATH_URL + "/ngs/", json=payload)
 
 
-def get_files(runs):
+def get_files(pipeline_name, runs):
     return File.objects.filter(
         port__run__in=runs,
         port__run__status=RunStatus.COMPLETED,
-        port__port_type=PortType.OUTPUT
+        port__port_type=PortType.OUTPUT,
+        file_name__in=PIPELINE_NAME_TO_FILES[pipeline_name]
     ).all()
 
 
@@ -126,9 +133,9 @@ def get_jobs(self):
 
         sample_sheet_path = get_sample_sheet(request_id, job_group_id)
 
-        files = get_files(runs)
+        files = get_files(pipeline_name, runs)
 
         submit_project(request_id)
-        submit_workflow(request_id, pipeline_name, files, sample_sheet_path)
+        submit_pipeline(request_id, pipeline_name, files, sample_sheet_path)
 
         return []