From ac3b313d80523f86f61f873e97835655785e365f Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Thu, 20 Apr 2017 09:47:03 -0500 Subject: [PATCH 1/4] Add new parameter user_tool to iedb calls to leverage queueing capabilities --- pvacseq/lib/call_iedb.py | 1 + tests/test_call_iedb.py | 4 +++- tests/test_pvacseq.py | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pvacseq/lib/call_iedb.py b/pvacseq/lib/call_iedb.py index 0ff8dbf..c762463 100644 --- a/pvacseq/lib/call_iedb.py +++ b/pvacseq/lib/call_iedb.py @@ -65,6 +65,7 @@ def main(args_input = sys.argv[1:]): 'sequence_text': args.input_file.read(), 'method': args.method, 'allele': args.allele, + 'user_tool': 'pVac-seq', } if args.epitope_length is not None: data['length'] = args.epitope_length diff --git a/tests/test_call_iedb.py b/tests/test_call_iedb.py index 9a48d48..f02fee4 100644 --- a/tests/test_call_iedb.py +++ b/tests/test_call_iedb.py @@ -81,7 +81,8 @@ def test_iedb_methods_generate_expected_files(self): 'sequence_text':reader.read(), 'method': method, 'allele': self.allele, - 'length': self.epitope_length + 'length': self.epitope_length, + 'user_tool': 'pVac-seq', }) reader.close() expected_output_file = os.path.join(self.test_data_dir, 'output_%s.tsv' % method) @@ -113,6 +114,7 @@ def test_iedb_methods_generate_expected_files(self): 'sequence_text':reader.read(), 'method': method, 'allele': self.allele, + 'user_tool': 'pVac-seq', }) reader.close() expected_output_file = os.path.join(self.test_data_dir, 'output_%s.tsv' % method) diff --git a/tests/test_pvacseq.py b/tests/test_pvacseq.py index fca41a8..6d021be 100644 --- a/tests/test_pvacseq.py +++ b/tests/test_pvacseq.py @@ -63,6 +63,7 @@ def generate_class_i_call(method, allele, length, path, input_path): 'method': method, 'allele': allele, 'length': length, + 'user_tool': 'pVac-seq', }) def generate_class_ii_call(method, allele, path, input_path): @@ -78,6 +79,7 @@ def generate_class_ii_call(method, allele, path, input_path): 'sequence_text': ""+text, 'method': method, 'allele': allele, + 'user_tool': 'pVac-seq', }) class PVACTests(unittest.TestCase): From 761254283cb64c996e761343172bdf7f689d3b09 Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Mon, 26 Jun 2017 13:52:35 -0500 Subject: [PATCH 2/4] Remove unnecessary line of code --- pvacseq/lib/binding_filter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pvacseq/lib/binding_filter.py b/pvacseq/lib/binding_filter.py index b54a3b7..166ae1d 100644 --- a/pvacseq/lib/binding_filter.py +++ b/pvacseq/lib/binding_filter.py @@ -56,7 +56,6 @@ def main(args_input = sys.argv[1:]): writer.writeheader() for entry in reader: - name = entry['Gene Name'] if args.top_score_metric == 'median': score = float(entry['Median MT Score']) fold_change = sys.maxsize if entry['Median Fold Change'] == 'NA' else float(entry['Median Fold Change']) From ad930b0caa7df69dcbdb8c8886c35847e51f7263 Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Mon, 28 Aug 2017 15:44:10 -0500 Subject: [PATCH 3/4] Fix bug with handling certain frameshift variants --- pvacseq/lib/generate_fasta.py | 5 ++++- ...nput_frameshift_variant_position_shift.tsv | 2 ++ ...ut_frameshift_variant_position_shift.fasta | 4 ++++ ...tput_frameshift_variant_position_shift.key | 4 ++++ tests/test_generate_fasta.py | 19 +++++++++++++++++++ 5 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv create mode 100644 tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta create mode 100644 tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key diff --git a/pvacseq/lib/generate_fasta.py b/pvacseq/lib/generate_fasta.py index 2801e58..e913718 100644 --- a/pvacseq/lib/generate_fasta.py +++ b/pvacseq/lib/generate_fasta.py @@ -93,7 +93,10 @@ def main(args_input = sys.argv[1:]): variant_type = line['variant_type'] full_wildtype_sequence = line['wildtype_amino_acid_sequence'] if variant_type == 'FS': - position = int(line['protein_position'].split('-', 1)[0]) - 1 + if line['amino_acid_change'] is not None and line['amino_acid_change'].split('/')[0] == '-': + position = int(line['protein_position'].split('-', 1)[0]) + else: + position = int(line['protein_position'].split('-', 1)[0]) - 1 elif variant_type == 'missense' or variant_type == 'inframe_ins': wildtype_amino_acid, mutant_amino_acid = line['amino_acid_change'].split('/') if wildtype_amino_acid == '-': diff --git a/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv b/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv new file mode 100644 index 0000000..5229106 --- /dev/null +++ b/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv @@ -0,0 +1,2 @@ +chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index +12 62381609 62381609 G GT USP15 ENST00000280377 -/X ENSG00000135655 MAEGGAADLDTQRSDIATLLKTSLRKGDTWYLVDSRWFKQWKKYVGFDSWDKYQMGDQNVYPGPIDNSGLLKDGDAQSLKEHLIDELDYILLPTEGWNKLVSWYTLMEGQEPIARKVVEQGMFVKHCKVEVYLTELKLCENGNMNNVVTRRFSKADTIDTIEKEIRKIFSIPDEKETRLWNKYMSNTFEPLNKPDSTIQDAGLYQGQVLVIEQKNEDGTWPRGPSTPKSPGASNFSTLPKISPSSLSNNYNNMNNRNVKNSNYCLPSYTAYKNYDYSEPGRNNEQPGLCGLSNLGNTCFMNSAIQCLSNTPPLTEYFLNDKYQEELNFDNPLGMRGEIAKSYAELIKQMWSGKFSYVTPRAFKTQVGRFAPQFSGYQQQDCQELLAFLLDGLHEDLNRIRKKPYIQLKDADGRPDKVVAEEAWENHLKRNDSIIVDIFHGLFKSTLVCPECAKISVTFDPFCYLTLPLPMKKERTLEVYLVRMDPLTKPMQYKVVVPKIGNILDLCTALSALSGIPADKMIVTDIYNHRFHRIFAMDENLSSIMERDDIYVFEININRTEDTEHVIIPVCLREKFRHSSYTHHTGSSLFGQPFLMAVPRNNTEDKLYNLLLLRMCRYVKISTETEETEGSLHCCKDQNINGNGPNGIHEEGSPSEMETDEPDDESSQDQELPSENENSQSEDSVGGDNDSENGLCTEDTCKGQLTGHKKRLFTFQFNNLGNTDINYIKDDTRHIRFDDRQLRLDERSFLALDWDPDLKKRYFDENAAEDFEKHESVEYKPPKKPFVKLKDCIELFTTKEKLGAEDPWYCPNCKEHQQATKKLDLWSLPPVLVVHLKRFSYSRYMRDKLDTLVDFPINDLDMSEFLINPNAGPCRYNLIAVSNHYGGMGGGHYTAFAKNKDDGKWYYFDDSSVSTASEDQIVSKAAYVLFYQRQDTFSGTGFFPLDRETKGASAATGIPLESDEDSNDNDNDIENENCMHTN YQANVVWKV FS 345-346 NA NA NA NA NA NA NA NA USP15_ENST00000280377_1.FS.345-346 diff --git a/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta new file mode 100644 index 0000000..f657d8d --- /dev/null +++ b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta @@ -0,0 +1,4 @@ +>1 +GEIAKSYAELIKQMWSGKFS +>2 +GEIAKSYAELYQANVVWKV diff --git a/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key new file mode 100644 index 0000000..d8dcf63 --- /dev/null +++ b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key @@ -0,0 +1,4 @@ +1: +- WT.USP15_ENST00000280377_1.FS.345-346 +2: +- MT.USP15_ENST00000280377_1.FS.345-346 diff --git a/tests/test_generate_fasta.py b/tests/test_generate_fasta.py index c7853aa..533aead 100644 --- a/tests/test_generate_fasta.py +++ b/tests/test_generate_fasta.py @@ -365,6 +365,25 @@ def test_input_file_with_frameshift_variant_range_generates_expected_file(self): expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_range.key') self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file)) + def test_input_file_with_frameshift_variant_position_shift_generates_expected_file(self): + generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_frameshift_variant_position_shift.tsv') + generate_fasta_output_file = tempfile.NamedTemporaryFile() + generate_fasta_key_output_file = tempfile.NamedTemporaryFile() + + self.assertFalse(call([ + self.python, + self.executable, + generate_fasta_input_file, + self.peptide_sequence_length, + self.epitope_length, + generate_fasta_output_file.name, + generate_fasta_key_output_file.name, + ], shell=False)) + expected_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.fasta') + self.assertTrue(cmp(generate_fasta_output_file.name, expected_output_file)) + expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.key') + self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file)) + def test_input_file_with_sequence_containing_asterisk(self): generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_asterisk_sequence.tsv') generate_fasta_output_file = tempfile.NamedTemporaryFile() From d2b82162a41eef2b8ffe0347bb310ca325707e2e Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Mon, 28 Aug 2017 12:33:16 -0500 Subject: [PATCH 4/4] Die if the wildtype amino acid in the wildtype sequence is different from the one in the amino acid change --- pvacseq/lib/generate_fasta.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pvacseq/lib/generate_fasta.py b/pvacseq/lib/generate_fasta.py index 2801e58..194a66c 100644 --- a/pvacseq/lib/generate_fasta.py +++ b/pvacseq/lib/generate_fasta.py @@ -129,6 +129,8 @@ def main(args_input = sys.argv[1:]): else: mutation_start_position, wildtype_subsequence = get_wildtype_subsequence(position, full_wildtype_sequence, wildtype_amino_acid_length, peptide_sequence_length, line) mutation_end_position = mutation_start_position + wildtype_amino_acid_length + if wildtype_amino_acid != '-' and wildtype_amino_acid != wildtype_subsequence[mutation_start_position:mutation_end_position]: + sys.exit("ERROR: There was a mismatch between the actual wildtype amino acid and the expected amino acid. Did you use the same reference build version for VEP that you used for creating the VCF?\n%s" % line) mutant_subsequence = wildtype_subsequence[:mutation_start_position] + mutant_amino_acid + wildtype_subsequence[mutation_end_position:] if '*' in wildtype_subsequence or '*' in mutant_subsequence: