Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Outputs metadata #135

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ check-env:
BEAGLE_AUTH_LDAP_SERVER_URI \
BEAGLE_LIMS_PASSWORD \
BEAGLE_LIMS_USERNAME; do \
[ -z "$$(printenv BEAGLE_LIMS_USERNAME)" ] && echo ">>> env variable $$i is not set; some features may not work" || : ; done
[ -z "$$(printenv $$i)" ] && echo ">>> env variable $$i is not set; some features may not work" || : ; done

# start the RabbitMQ server in the background
rabbitmq-start: $(LOG_DIR_ABS)
Expand Down Expand Up @@ -436,10 +436,11 @@ file-get:
http://$(DJANGO_BEAGLE_IP):$(DJANGO_BEAGLE_PORT)/v0/fs/files/?filename=$(REQFILE)

# start a Roslin run for a given request in the Beagle db
run-request:
run-request: $(AUTH_FILE)
@token=$$( jq -r '.token' "$(AUTH_FILE)" ) && \
curl -H "Content-Type: application/json" \
-X POST \
-H "Authorization: Bearer $(TOKEN)" \
-H "Authorization: Bearer $$token" \
--data '{"request_ids":["$(REQID)"], "pipeline_name": "roslin"}' \
http://$(DJANGO_BEAGLE_IP):$(DJANGO_BEAGLE_PORT)/v0/run/request/

Expand Down Expand Up @@ -467,12 +468,20 @@ $(DEMO_INPUT): $(INPUT_TEMPLATE) $(AUTH_FILE)
.PHONY: $(DEMO_INPUT)

# submit a demo Roslin run using the dev Roslin pipeline entry in the database
demo-run: register-dev-pipeline $(DEMO_INPUT)
# submit using the API endpoint; bypasses the Operator
demo-run-api: register-dev-pipeline $(DEMO_INPUT)
@python manage.py loaddata fixtures/tests/juno_roslin_demo2.file.json
@python manage.py loaddata fixtures/tests/juno_roslin_demo2.filemetadata.json
@python manage.py loaddata fixtures/tests/roslin_reference_files.json
@$(MAKE) run-request-api REQID=DemoRequest1 REQJSON=$(DEMO_INPUT)

# submit using standard request; uses the Operator
demo-run: register-dev-pipeline $(DEMO_INPUT)
@python manage.py loaddata fixtures/tests/juno_roslin_demo2.file.json
@python manage.py loaddata fixtures/tests/juno_roslin_demo2.filemetadata.json
@python manage.py loaddata fixtures/tests/roslin_reference_files.json
$(MAKE) run-request REQID=DemoRequest1

# check if the ports needed for services and servers are already in use on this system
ifeq ($(UNAME), Darwin)
# On macOS High Sierra, use this command: lsof -nP -i4TCP:$PORT | grep LISTEN
Expand Down
38 changes: 28 additions & 10 deletions runner/operator/roslin_operator/construct_roslin_pair.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from .bin.make_sample import remove_with_caveats
from .bin.pair_request import compile_pairs


class InvalidAssay(Exception):
pass

# TODO: generalize
def load_references():
Expand Down Expand Up @@ -50,7 +51,6 @@ def format_sample(data):

return sample


def construct_roslin_jobs(samples):
samples, error_samples = remove_with_caveats(samples)
pairs = compile_pairs(samples)
Expand All @@ -67,7 +67,18 @@ def construct_roslin_jobs(samples):
job['pair'] = [tumor_sample, normal_sample]
references = convert_references(project_id, assay)
job.update(references)
roslin_jobs.append(job)

job_metadata = {}
job_metadata['assay'] = assay
job_metadata['request_id'] = project_id
job_metadata['tumor'] = {}
job_metadata['tumor']['igo_id'] = tumor['igo_id']
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@allanbolipata @sivkovic which identifier are we using inside the Roslin pipelines? IGO ID or Patient ID or some other ID?

job_metadata['tumor']['patient_id'] = tumor['patient_id']
job_metadata['normal'] = {}
job_metadata['normal']['igo_id'] = normal['igo_id']
job_metadata['normal']['patient_id'] = normal['patient_id']

roslin_jobs.append((job, job_metadata))
return roslin_jobs, error_samples


Expand All @@ -85,13 +96,11 @@ def get_curated_bams(assay,request_files):
array.append({'class': 'File', 'location': str(bam)})
return array


def get_baits_and_targets(assay, roslin_resources):
# probably need similar rules for whatever "Exome" string is in rquest
targets = roslin_resources['targets']

target_assay = assay

def get_target_assay(assay):
"""
Return a target assay label for a provided assay; the provided assay may not exactly match the desired target assay so resolve it here to the desired output value
"""
target_assay = None
if assay.find("IMPACT410") > -1:
target_assay = "IMPACT410_b37"
if assay.find("IMPACT468") > -1:
Expand All @@ -104,6 +113,15 @@ def get_baits_and_targets(assay, roslin_resources):
target_assay = "IMPACT468_08390"
if assay.find("IMPACT468+Poirier_RB1_intron_V2") > -1:
target_assay = "IMPACT468_08050"
if target_assay == None:
raise InvalidAssay(assay)
return(target_assay)

def get_baits_and_targets(assay, roslin_resources):
# probably need similar rules for whatever "Exome" string is in rquest
targets = roslin_resources['targets']

target_assay = get_target_assay(assay)

if target_assay in targets:
return {"bait_intervals": {"class": "File", 'location': str(targets[target_assay]['baits_list'])},
Expand Down
27 changes: 23 additions & 4 deletions runner/operator/roslin_operator/roslin_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@
from .construct_roslin_pair import construct_roslin_jobs
from .bin.pair_request import compile_pairs
from .bin.make_sample import build_sample
from pprint import pprint

import beagle_etl.celery
beagle_etl.celery.app.conf['task_always_eager'] = True

pprint("roslin_operator module loaded...", stream = open("debug.log", "a"))

class RoslinOperator(Operator):

Expand Down Expand Up @@ -43,11 +48,25 @@ def get_jobs(self):
roslin_inputs, error_samples = construct_roslin_jobs(samples)
number_of_inputs = len(roslin_inputs)

for i, job in enumerate(roslin_inputs):
for i, job_items in enumerate(roslin_inputs):
job = job_items[0]
job_metadata = job_items[1]
tumor_sample_name = job['pair'][0]['ID']
normal_sample_name = job['pair'][1]['ID']
name = "ROSLIN %s, %i of %i" % (self.request_id, i + 1, number_of_inputs)
roslin_jobs.append((APIRunCreateSerializer(
data={'app': self.get_pipeline_id(), 'inputs': roslin_inputs, 'name': name,
'tags': {'requestId': self.request_id}}), job))
data = {
'app': self.get_pipeline_id(),
'inputs': roslin_inputs,
'name': name,
'tags': {'requestId': self.request_id},
'output_metadata': {
'assay': job_metadata['assay'],
'request_id': job_metadata['request_id'],
'tumor_igo_id': job_metadata['tumor']['igo_id'],
'tumor_patient_id': job_metadata['tumor']['patient_id'],
'normal_igo_id': job_metadata['normal']['igo_id'],
'normal_patient_id': job_metadata['normal']['patient_id'],
}
}
roslin_jobs.append((APIRunCreateSerializer(data = data), job))
return roslin_jobs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from uuid import UUID
from django.test import TestCase
from runner.operator.roslin_operator.construct_roslin_pair import construct_roslin_jobs
from runner.operator.roslin_operator.construct_roslin_pair import get_baits_and_targets
from runner.operator.roslin_operator.construct_roslin_pair import get_target_assay
from runner.operator.roslin_operator.construct_roslin_pair import InvalidAssay
from runner.operator.roslin_operator.bin.make_sample import build_sample
from file_system.models import File, FileMetadata, FileGroup, FileType
from django.conf import settings
Expand Down Expand Up @@ -55,5 +58,54 @@ def test_construct_roslin_jobs1(self):
samples.append(build_sample(igo_id_group[igo_id]))

roslin_inputs, error_samples = construct_roslin_jobs(samples)
# pprint(">>> roslin_inputs: ")
# print(json.dumps(roslin_inputs, indent = 4))
expected_inputs = json.load(open(os.path.join(settings.TEST_FIXTURE_DIR, "10075_D_single_TN_pair.roslin.input.json")))
self.assertTrue(roslin_inputs == expected_inputs)

def test_get_baits_and_targets1(self):
"""
Test that the correct baits and targets are returned for a given assay
"""
roslin_resources = json.load(open("runner/operator/roslin_operator/reference_jsons/roslin_resources.json", 'rb'))
targets = roslin_resources['targets']

# invalid assay throws a TypeError
with self.assertRaises(InvalidAssay):
get_baits_and_targets(assay = "foo", roslin_resources = roslin_resources)

# known combinations of assay label pattern vs. true assay type to use for targets lookup
combinations = [
("IMPACT410", "IMPACT410_b37"),
("IMPACT468", "IMPACT468_b37"),
("IMPACT341", "IMPACT341_b37"),
("IDT_Exome_v1_FP", "IDT_Exome_v1_FP_b37"),
("IMPACT468+08390", "IMPACT468_08390"),
("IMPACT468+Poirier_RB1_intron_V2", "IMPACT468_08050")
]

for find_assay, target_assay in combinations:
expected_targets = {"bait_intervals": {"class": "File", 'location': str(targets[target_assay]['baits_list'])},
"target_intervals": {"class": "File", 'location': str(targets[target_assay]['targets_list'])},
"fp_intervals": {"class": "File", 'location': str(targets[target_assay]['FP_intervals'])},
"fp_genotypes": {"class": "File", 'location': str(targets[target_assay]['FP_genotypes'])}}
self.assertEqual( get_baits_and_targets(assay = find_assay, roslin_resources = roslin_resources), expected_targets)

def test_get_target_assay1(self):
"""
Test that the correct target assay label is returned for a given assay label which might be different from the actual target assay to use
"""
with self.assertRaises(InvalidAssay):
get_target_assay(assay = "foo")

# known combinations of assay label pattern vs. true assay type to use for targets lookup
combinations = [
("IMPACT410", "IMPACT410_b37"),
("IMPACT468", "IMPACT468_b37"),
("IMPACT341", "IMPACT341_b37"),
("IDT_Exome_v1_FP", "IDT_Exome_v1_FP_b37"),
("IMPACT468+08390", "IMPACT468_08390"),
("IMPACT468+Poirier_RB1_intron_V2", "IMPACT468_08050")
]
for find_assay, target_assay in combinations:
self.assertEqual( get_target_assay(assay = find_assay), target_assay)
Empty file.
63 changes: 63 additions & 0 deletions runner/tests/serializers/test_serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Tests for serialzers
"""
from django.test import TestCase
from uuid import UUID
from runner.serializers import APIRunCreateSerializer
from runner.models import Run

class TestSerializers(TestCase):
fixtures = [
"file_system.filegroup.json",
"file_system.filetype.json",
"file_system.storage.json",
"runner.pipeline.json"
]

def test_create_run_serializer1(self):
"""
Test that the API Run Create Serializer works and creates a Run
"""
# start with 0 runs in the database
self.assertEqual(len(Run.objects.all()), 0)

# data to pass to serializer
data = {
'app': 'cb5d793b-e650-4b7d-bfcd-882858e29cc5',
'inputs': [],
'name': 'ROSLIN 10075_D, 1 of 1',
'tags': {'requestId': '10075_D'}
}

# run the serialzer
serializer = APIRunCreateSerializer(data = data)
serializer.is_valid()
run = serializer.save()

# should be a Run in the database now
self.assertEqual(len(Run.objects.all()), 1)

run_instance = Run.objects.all()[0]
self.assertEqual(run_instance.app_id, UUID('cb5d793b-e650-4b7d-bfcd-882858e29cc5'))
self.assertTrue(run_instance.name.startswith(data['name']))
self.assertEqual(run_instance.tags, {'requestId': '10075_D'})
self.assertEqual(run_instance.status, 0)

def test_create_run_with_output_metadata1(self):
"""
Test that output_metadata propagates to the Run instance created
"""
data = {
'app': 'cb5d793b-e650-4b7d-bfcd-882858e29cc5',
'inputs': [],
'name': 'foo Run',
'output_metadata': {'assay':'IMPACT486'}
}
serializer = APIRunCreateSerializer(data = data)
serializer.is_valid()
run = serializer.save()
run_instance = Run.objects.all()[0]
self.assertEqual(run_instance.app_id, UUID('cb5d793b-e650-4b7d-bfcd-882858e29cc5'))
self.assertTrue(run_instance.name.startswith(data['name']))
self.assertEqual(run_instance.status, 0)
self.assertEqual(run_instance.output_metadata, data['output_metadata'])